diff --git a/accumulo-handler/src/test/results/positive/accumulo_queries.q.out b/accumulo-handler/src/test/results/positive/accumulo_queries.q.out index de82857c25..f92fcf1a8f 100644 --- a/accumulo-handler/src/test/results/positive/accumulo_queries.q.out +++ b/accumulo-handler/src/test/results/positive/accumulo_queries.q.out @@ -41,6 +41,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-4 depends on stages: Stage-2, Stage-1, Stage-3 Stage-1 is a root stage Stage-3 is a root stage @@ -57,6 +58,13 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.accumulo_table_1 + Stage: Stage-1 Pre Insert operator: Pre-Insert task @@ -82,6 +90,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableOutputFormat serde: org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe name: default.accumulo_table_1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE accumulo_table_1 SELECT * WHERE (key%2)=0 PREHOOK: type: QUERY @@ -501,9 +535,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-6 depends on stages: Stage-2, Stage-1, Stage-4 Stage-1 is a root stage - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 + Stage-5 is a root stage + Stage-3 depends on stages: Stage-5 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-0 @@ -518,11 +554,18 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value, count + Column Types: int, string, int + Table: default.accumulo_table_3 + Stage: Stage-1 Pre Insert operator: Pre-Insert task - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -604,6 +647,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableOutputFormat serde: org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe name: default.accumulo_table_3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, value, count + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE accumulo_table_3 SELECT x.key, x.value, Y.count diff --git a/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out b/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out index 6621a4e204..50a75805ee 100644 --- a/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out +++ b/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out @@ -35,6 +35,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-12 depends on stages: Stage-2, Stage-1, Stage-5, Stage-11 + Stage-13 depends on stages: Stage-2, Stage-1, Stage-5, Stage-11 Stage-1 is a root stage Stage-4 is a root stage Stage-10 depends on stages: Stage-4 , consists of Stage-7, Stage-6, Stage-8 @@ -44,6 +46,7 @@ STAGE DEPENDENCIES: Stage-6 Stage-8 Stage-9 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-0 @@ -58,6 +61,20 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_x1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_x2 + Stage: Stage-1 Pre Insert operator: Pre-Insert task @@ -83,6 +100,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_x1 + Select Operator + expressions: _col0 (type: string), '' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 50) and (key < 100)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -98,6 +128,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableOutputFormat serde: org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe name: default.src_x2 + Select Operator + expressions: _col0 (type: string), '' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-10 Conditional Operator @@ -151,6 +209,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src a insert overwrite table src_x1 select key,"" where a.key > 0 AND a.key < 50 diff --git a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java index 7c27d07024..01217f4f16 100644 --- a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -257,6 +257,14 @@ public static void setColumnStatsState(Map params, List } } + public static boolean canColumnStatsMerge(Map params, String colName) { + if (params == null) { + return false; + } + ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); + return stats.columnStats.containsKey(colName); + } + public static void clearColumnStatsState(Map params) { if (params == null) { return; diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index fce8db3df1..00cfa0ff99 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1680,7 +1680,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal // Statistics HIVESTATSAUTOGATHER("hive.stats.autogather", true, "A flag to gather statistics (only basic) automatically during the INSERT OVERWRITE command."), - HIVESTATSCOLAUTOGATHER("hive.stats.column.autogather", false, + HIVESTATSCOLAUTOGATHER("hive.stats.column.autogather", true, "A flag to gather column statistics automatically."), HIVESTATSDBCLASS("hive.stats.dbclass", "fs", new PatternSet("custom", "fs"), "The storage that stores temporary Hive statistics. In filesystem based statistics collection ('fs'), \n" + diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes.q.out index 6876ca8775..b02bc17159 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes.q.out @@ -38,6 +38,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -72,6 +73,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -95,6 +122,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out index 79cf8fe1e5..db705735aa 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out @@ -38,6 +38,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -68,6 +69,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -91,6 +118,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: smallint, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out index fec58ef026..40c2dbcb5a 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out @@ -38,6 +38,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -68,6 +69,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -91,6 +118,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out index 1131478a7b..c75fa3bdb2 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out @@ -38,6 +38,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -77,6 +78,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +112,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM src SELECT TRANSFORM(cast(src.key as tinyint), src.value) ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe' diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out index 8d3b95ece8..427286a945 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out @@ -38,6 +38,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -72,6 +73,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -95,6 +122,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/data/conf/hive-site.xml b/data/conf/hive-site.xml index 62364fe4ea..4985803f9b 100644 --- a/data/conf/hive-site.xml +++ b/data/conf/hive-site.xml @@ -302,12 +302,15 @@ true - hive.llap.io.allocator.direct false + + hive.stats.column.autogather + true + hive.materializedview.rewriting diff --git a/hbase-handler/src/test/results/positive/hbase_ddl.q.out b/hbase-handler/src/test/results/positive/hbase_ddl.q.out index 8cb88edb30..9be51ae5a6 100644 --- a/hbase-handler/src/test/results/positive/hbase_ddl.q.out +++ b/hbase-handler/src/test/results/positive/hbase_ddl.q.out @@ -41,6 +41,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-4 depends on stages: Stage-2, Stage-1, Stage-3 Stage-1 is a root stage Stage-3 is a root stage @@ -57,6 +58,13 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.hbase_table_1 + Stage: Stage-1 Pre Insert operator: Pre-Insert task @@ -82,6 +90,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat serde: org.apache.hadoop.hive.hbase.HBaseSerDe name: default.hbase_table_1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE hbase_table_1 SELECT * WHERE (key%2)=0 PREHOOK: type: QUERY @@ -117,6 +151,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} hbase.mapred.output.outputtable kkk hbase.table.name hbase_table_0 #### A masked pattern was here #### @@ -164,6 +199,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} hbase.table.name hbase_table_0 #### A masked pattern was here #### numFiles 0 diff --git a/hbase-handler/src/test/results/positive/hbase_queries.q.out b/hbase-handler/src/test/results/positive/hbase_queries.q.out index 276b6b8328..035e62e1a3 100644 --- a/hbase-handler/src/test/results/positive/hbase_queries.q.out +++ b/hbase-handler/src/test/results/positive/hbase_queries.q.out @@ -41,6 +41,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-4 depends on stages: Stage-2, Stage-1, Stage-3 Stage-1 is a root stage Stage-3 is a root stage @@ -57,6 +58,13 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.hbase_table_1 + Stage: Stage-1 Pre Insert operator: Pre-Insert task @@ -82,6 +90,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat serde: org.apache.hadoop.hive.hbase.HBaseSerDe name: default.hbase_table_1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE hbase_table_1 SELECT * WHERE (key%2)=0 PREHOOK: type: QUERY @@ -504,9 +538,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-6 depends on stages: Stage-2, Stage-1, Stage-4 Stage-1 is a root stage - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 + Stage-5 is a root stage + Stage-3 depends on stages: Stage-5 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-0 @@ -521,11 +557,18 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value, count + Column Types: int, string, int + Table: default.hbase_table_3 + Stage: Stage-1 Pre Insert operator: Pre-Insert task - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -607,6 +650,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat serde: org.apache.hadoop.hive.hbase.HBaseSerDe name: default.hbase_table_3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, value, count + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE hbase_table_3 SELECT x.key, x.value, Y.count diff --git a/hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out b/hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out index 68a417d0c1..a8ba3f63b2 100644 --- a/hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out +++ b/hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out @@ -35,6 +35,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-12 depends on stages: Stage-2, Stage-1, Stage-5, Stage-11 + Stage-13 depends on stages: Stage-2, Stage-1, Stage-5, Stage-11 Stage-1 is a root stage Stage-4 is a root stage Stage-10 depends on stages: Stage-4 , consists of Stage-7, Stage-6, Stage-8 @@ -44,6 +46,7 @@ STAGE DEPENDENCIES: Stage-6 Stage-8 Stage-9 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-0 @@ -58,6 +61,20 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_x1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_x2 + Stage: Stage-1 Pre Insert operator: Pre-Insert task @@ -83,6 +100,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_x1 + Select Operator + expressions: _col0 (type: string), '' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 50) and (key < 100)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -98,6 +128,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat serde: org.apache.hadoop.hive.hbase.HBaseSerDe name: default.src_x2 + Select Operator + expressions: _col0 (type: string), '' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-10 Conditional Operator @@ -151,6 +209,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src a insert overwrite table src_x1 select key,"" where a.key > 0 AND a.key < 50 diff --git a/hbase-handler/src/test/results/positive/hbasestats.q.out b/hbase-handler/src/test/results/positive/hbasestats.q.out index b7ef954944..72b3a42196 100644 --- a/hbase-handler/src/test/results/positive/hbasestats.q.out +++ b/hbase-handler/src/test/results/positive/hbasestats.q.out @@ -64,6 +64,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-4 depends on stages: Stage-2, Stage-1, Stage-3 Stage-1 is a root stage Stage-3 is a root stage @@ -80,6 +81,13 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, state, country, country_id + Column Types: string, string, string, int + Table: default.users + Stage: Stage-1 Pre Insert operator: Pre-Insert task @@ -102,6 +110,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat serde: org.apache.hadoop.hive.hbase.HBaseSerDe name: default.users + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) + outputColumnNames: key, state, country, country_id + Statistics: Num rows: 500 Data size: 133000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(state, 16), compute_stats(country, 16), compute_stats(country_id, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE users SELECT 'user1', 'IA', 'USA', 0 FROM src PREHOOK: type: QUERY @@ -131,6 +165,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"country\":\"true\",\"country_id\":\"true\",\"key\":\"true\",\"state\":\"true\"}} #### A masked pattern was here #### numFiles 0 numRows 0 @@ -196,6 +231,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"country\":\"true\",\"country_id\":\"true\",\"key\":\"true\",\"state\":\"true\"}} #### A masked pattern was here #### numFiles 0 numRows 0 @@ -252,7 +288,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"country\":\"true\",\"country_id\":\"true\",\"key\":\"true\",\"state\":\"true\"}} #### A masked pattern was here #### numFiles 0 numRows 2 @@ -323,6 +359,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"country\":\"true\",\"country_id\":\"true\",\"key\":\"true\",\"state\":\"true\"}} #### A masked pattern was here #### numFiles 0 numRows 2 diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out index e55b1c257e..59b239753d 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out @@ -78,6 +78,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -211,6 +212,14 @@ STAGE PLANS: Stats-Aggr Operator Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: id + Column Types: int + Table: default.table1 + Is Table Level Stats: false + PREHOOK: query: DROP TABLE table1 PREHOOK: type: DROPTABLE PREHOOK: Input: default@table1 diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out index 663a572748..1651900838 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out @@ -43,6 +43,7 @@ STAGE DEPENDENCIES: Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-2 depends on stages: Stage-0, Stage-4 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-0 depends on stages: Stage-3, Stage-6 Stage-5 @@ -93,6 +94,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int) + outputColumnNames: id + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(id, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -132,6 +149,35 @@ STAGE PLANS: name: default.values__tmp__table__3 Truncated Path -> Alias: #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -169,6 +215,14 @@ STAGE PLANS: Stats-Aggr Operator Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: id + Column Types: int + Table: default.table1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out index 6e95fd123c..17db9dbd44 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out @@ -107,7 +107,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns id,key @@ -129,7 +129,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns id,key @@ -358,7 +358,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns id,key @@ -380,7 +380,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns id,key diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out index 660cebba5f..3894ec4735 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out @@ -96,6 +96,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -229,6 +230,14 @@ STAGE PLANS: Stats-Aggr Operator Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: id + Column Types: int + Table: default.table1 + Is Table Level Stats: false + PREHOOK: query: DROP TABLE table1 PREHOOK: type: DROPTABLE PREHOOK: Input: default@table1 diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out index 8052fd86ee..d29bd606b7 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out @@ -51,6 +51,7 @@ STAGE DEPENDENCIES: Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-2 depends on stages: Stage-0, Stage-4 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-0 depends on stages: Stage-3, Stage-6 Stage-5 @@ -79,7 +80,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -101,6 +102,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int) + outputColumnNames: id + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(id, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -140,6 +157,35 @@ STAGE PLANS: name: default.values__tmp__table__3 Truncated Path -> Alias: #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -153,7 +199,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -177,6 +223,14 @@ STAGE PLANS: Stats-Aggr Operator Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: id + Column Types: int + Table: default.table1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -191,7 +245,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -222,7 +276,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -244,7 +298,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -276,7 +330,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -310,7 +364,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -341,7 +395,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -363,7 +417,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id diff --git a/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out b/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out index 2ababb1eec..02b8d77c46 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out @@ -33,6 +33,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -214,6 +215,41 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -248,6 +284,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.blobstore_table + Is Table Level Stats: true + PREHOOK: query: EXPLAIN EXTENDED FROM hdfs_table INSERT OVERWRITE TABLE blobstore_table SELECT hdfs_table.key GROUP BY hdfs_table.key ORDER BY hdfs_table.key PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED FROM hdfs_table INSERT OVERWRITE TABLE blobstore_table SELECT hdfs_table.key GROUP BY hdfs_table.key ORDER BY hdfs_table.key @@ -257,6 +301,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -438,6 +483,41 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -472,6 +552,14 @@ STAGE PLANS: Stats-Aggr Operator Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.blobstore_table + Is Table Level Stats: true + PREHOOK: query: DROP TABLE hdfs_table PREHOOK: type: DROPTABLE PREHOOK: Input: default@hdfs_table diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMTQueries.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMTQueries.java index ad2baa2e26..e8ef4b97d6 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMTQueries.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMTQueries.java @@ -44,6 +44,7 @@ public void testMTQueries1() throws Exception { util.getConf().setBoolean("hive.exec.submit.local.task.via.child", true); util.getConf().set("hive.stats.dbclass", "fs"); util.getConf().set("hive.mapred.mode", "nonstrict"); + util.getConf().set("hive.stats.column.autogather", "false"); } boolean success = QTestUtil.queryListRunnerMultiThreaded(qfiles, qts); if (!success) { diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java index 4a9af80fdc..96173c014e 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java @@ -201,6 +201,7 @@ private static void startMiniHS2(HiveConf conf) throws Exception { private static void startMiniHS2(HiveConf conf, boolean httpMode) throws Exception { conf.setBoolVar(ConfVars.HIVE_SUPPORT_CONCURRENCY, false); conf.setBoolVar(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED, false); + conf.setBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER, false); MiniHS2.Builder builder = new MiniHS2.Builder().withConf(conf).cleanupLocalDirOnStartup(false); if (httpMode) { builder = builder.withHTTPTransport(); diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 489f375e6e..dbfd5234d1 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -460,6 +460,7 @@ minillaplocal.query.files=acid_globallimit.q,\ auto_sortmerge_join_8.q,\ auto_sortmerge_join_9.q,\ autoColumnStats_1.q,\ + autoColumnStats_10.q,\ autoColumnStats_2.q,\ bucket4.q,\ bucket_groupby.q,\ diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index ff8412c19e..74afa48104 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -1937,11 +1937,20 @@ public static void mergeColStats(ColumnStatistics csNew, ColumnStatistics csOld) ColumnStatisticsObj statsObjNew = csNew.getStatsObj().get(index); ColumnStatisticsObj statsObjOld = map.get(statsObjNew.getColName()); if (statsObjOld != null) { + if (statsObjNew.getStatsData().getSetField() != statsObjOld.getStatsData().getSetField()) { + // because we already confirm that the stats is accurate + // it is impossible that the column types have been changed while the + // column stats is still accurate. + throw new RuntimeException("Column " + statsObjNew.getColName() + + "'s old type is different from new type. " + + "We can not merge stats in auto column stats gathering."); + } // If statsObjOld is found, we can merge. ColumnStatsMerger merger = ColumnStatsMergerFactory.getColumnStatsMerger(statsObjNew, statsObjOld); merger.merge(statsObjNew, statsObjOld); } + // otherwise we can just insert the new statsObj list.add(statsObjNew); } csNew.setStatsObj(list); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java index fe890e4e27..254398e2c1 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java @@ -49,12 +49,8 @@ private static int countNumBitVectors(String s) { public static ColumnStatsMerger getColumnStatsMerger(ColumnStatisticsObj statsObjNew, ColumnStatisticsObj statsObjOld) { ColumnStatsMerger agg; - _Fields typeNew = statsObjNew.getStatsData().getSetField(); - _Fields typeOld = statsObjOld.getStatsData().getSetField(); - // make sure that they have the same type - typeNew = typeNew == typeOld ? typeNew : null; int numBitVectors = 0; - switch (typeNew) { + switch (statsObjNew.getStatsData().getSetField()) { case BOOLEAN_STATS: agg = new BooleanColumnStatsMerger(); break; @@ -97,7 +93,7 @@ public static ColumnStatsMerger getColumnStatsMerger(ColumnStatisticsObj statsOb break; } default: - throw new IllegalArgumentException("Unknown stats type " + typeNew.toString()); + throw new IllegalArgumentException("Unknown stats type " + statsObjNew.getStatsData().getSetField()); } if (numBitVectors > 0) { agg.ndvEstimator = new NumDistinctValueEstimator(numBitVectors); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java b/ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java index f43992c85d..833afcd19f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java @@ -18,11 +18,12 @@ package org.apache.hadoop.hive.ql; +import org.apache.hadoop.hive.ql.exec.ColumnStatsTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.NodeUtils; import org.apache.hadoop.hive.ql.exec.NodeUtils.Function; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.StatsTask; +import org.apache.hadoop.hive.ql.exec.BasicStatsTask; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskRunner; import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; @@ -64,7 +65,7 @@ private Context ctx; private boolean shutdown; - final Map statsTasks = new HashMap(1); + final Map columnStatsTasks = new HashMap<>(1); public DriverContext() { } @@ -188,10 +189,12 @@ public void incCurJobNo(int amount) { public void prepare(QueryPlan plan) { // extract stats keys from StatsTask List> rootTasks = plan.getRootTasks(); - NodeUtils.iterateTask(rootTasks, StatsTask.class, new Function() { + NodeUtils.iterateTask(rootTasks, ColumnStatsTask.class, new Function() { @Override - public void apply(StatsTask statsTask) { - statsTasks.put(statsTask.getWork().getAggKey(), statsTask); + public void apply(ColumnStatsTask columnStatsTask) { + if(columnStatsTask.getWork().getBasicStatsWork()!=null) { + columnStatsTasks.put(columnStatsTask.getWork().getBasicStatsWork().getAggKey(), columnStatsTask); + } } }); } @@ -200,7 +203,7 @@ public void prepare(TaskRunner runner) { } public void finished(TaskRunner runner) { - if (statsTasks.isEmpty() || !(runner.getTask() instanceof MapRedTask)) { + if (columnStatsTasks.isEmpty() || !(runner.getTask() instanceof MapRedTask)) { return; } MapRedTask mapredTask = (MapRedTask) runner.getTask(); @@ -221,7 +224,7 @@ public void apply(FileSinkOperator fsOp) { } }); for (String statKey : statKeys) { - statsTasks.get(statKey).getWork().setSourceTask(mapredTask); + columnStatsTasks.get(statKey).getWork().getBasicStatsWork().setSourceTask(mapredTask); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/BasicStatsNoJobTask.java similarity index 97% rename from ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java rename to ql/src/java/org/apache/hadoop/hive/ql/exec/BasicStatsNoJobTask.java index 3807f434a7..5939f0a70a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/BasicStatsNoJobTask.java @@ -50,7 +50,7 @@ import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; -import org.apache.hadoop.hive.ql.plan.StatsNoJobWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsNoJobWork; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.InputFormat; @@ -73,16 +73,16 @@ * rows. This task can be used for computing basic stats like numFiles, numRows, fileSize, * rawDataSize from ORC footer. **/ -public class StatsNoJobTask extends Task implements Serializable { +public class BasicStatsNoJobTask extends Task implements Serializable { private static final long serialVersionUID = 1L; - private static transient final Logger LOG = LoggerFactory.getLogger(StatsNoJobTask.class); + private static transient final Logger LOG = LoggerFactory.getLogger(BasicStatsNoJobTask.class); private ConcurrentMap partUpdates; private Table table; private String tableFullName; private JobConf jc = null; - public StatsNoJobTask() { + public BasicStatsNoJobTask() { super(); } @@ -143,7 +143,6 @@ public void run() { // get the list of partitions org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition(); Map parameters = tPart.getParameters(); - try { Path dir = new Path(tPart.getSd().getLocation()); long numRows = 0; @@ -176,6 +175,9 @@ public void run() { } if (statsAvailable) { + if (work.isClearColumnStats()) { + StatsSetupConst.clearColumnStatsState(parameters); + } parameters.put(StatsSetupConst.ROW_COUNT, String.valueOf(numRows)); parameters.put(StatsSetupConst.RAW_DATA_SIZE, String.valueOf(rawDataSize)); parameters.put(StatsSetupConst.TOTAL_SIZE, String.valueOf(fileSize)); @@ -285,7 +287,9 @@ private int aggregateStats(ExecutorService threadPool, Hive db) { parameters.put(StatsSetupConst.NUM_FILES, String.valueOf(numFiles)); EnvironmentContext environmentContext = new EnvironmentContext(); environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK); - + if (work.isClearColumnStats()) { + StatsSetupConst.clearColumnStatsState(tTable.getParameters()); + } db.alterTable(tableFullName, new Table(tTable), environmentContext); String msg = "Table " + tableFullName + " stats: [" + toString(parameters) + ']'; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/BasicStatsTask.java similarity index 96% rename from ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java rename to ql/src/java/org/apache/hadoop/hive/ql/exec/BasicStatsTask.java index c22d69bb19..e50d047b13 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/BasicStatsTask.java @@ -54,7 +54,7 @@ import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; -import org.apache.hadoop.hive.ql.plan.StatsWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsWork; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.stats.StatsAggregator; import org.apache.hadoop.hive.ql.stats.StatsCollectionContext; @@ -73,28 +73,20 @@ * MetaStore layer covers all Thrift calls and provides better guarantees about the accuracy of * those stats. **/ -public class StatsTask extends Task implements Serializable { +public class BasicStatsTask extends Task implements Serializable { private static final long serialVersionUID = 1L; - private static transient final Logger LOG = LoggerFactory.getLogger(StatsTask.class); + private static transient final Logger LOG = LoggerFactory.getLogger(BasicStatsTask.class); private Table table; private Collection dpPartSpecs; - public StatsTask() { + public BasicStatsTask() { super(); dpPartSpecs = null; } @Override - protected void receiveFeed(FeedType feedType, Object feedValue) { - // this method should be called by MoveTask when there are dynamic partitions generated - if (feedType == FeedType.DYNAMIC_PARTITIONS) { - dpPartSpecs = (Collection) feedValue; - } - } - - @Override public int execute(DriverContext driverContext) { if (driverContext.getCtx().getExplainAnalyze() == AnalyzeState.RUNNING) { return 0; @@ -214,6 +206,9 @@ private int aggregateStats(Hive db) { } } + if (work.isClearColumnStats()) { + StatsSetupConst.clearColumnStatsState(tTable.getParameters()); + } getHive().alterTable(tableFullName, new Table(tTable), environmentContext); if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) { console.printInfo("Table " + tableFullName + " stats: [" + toString(parameters) + ']'); @@ -313,6 +308,9 @@ public Void call() throws Exception { StatsSetupConst.TASK); } } + if (work.isClearColumnStats()) { + StatsSetupConst.clearColumnStatsState(parameters); + } updates.add(new Partition(table, tPart)); if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) { @@ -505,4 +503,12 @@ private String toString(Map parameters) { } return list; } + + public Collection getDpPartSpecs() { + return dpPartSpecs; + } + + public void setDpPartSpecs(Collection dpPartSpecs) { + this.dpPartSpecs = dpPartSpecs; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java index d96f432fee..c7c859e58d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java @@ -22,12 +22,13 @@ import java.io.Serializable; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Collection; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; @@ -52,8 +53,11 @@ import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; +import org.apache.hadoop.hive.ql.plan.BasicStatsNoJobWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsWork; import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.session.SessionState; @@ -88,13 +92,15 @@ public ColumnStatsTask() { public void initialize(QueryState queryState, QueryPlan queryPlan, DriverContext ctx, CompilationOpContext opContext) { super.initialize(queryState, queryPlan, ctx, opContext); - work.initializeForFetch(opContext); - try { - JobConf job = new JobConf(conf); - ftOp = new FetchOperator(work.getfWork(), job); - } catch (Exception e) { - LOG.error(StringUtils.stringifyException(e)); - throw new RuntimeException(e); + if (work.getfWork() != null) { + work.initializeForFetch(opContext); + try { + JobConf job = new JobConf(conf); + ftOp = new FetchOperator(work.getfWork(), job); + } catch (Exception e) { + LOG.error(StringUtils.stringifyException(e)); + throw new RuntimeException(e); + } } } @@ -137,7 +143,8 @@ private void unpackDoubleStats(ObjectInspector oi, Object o, String fName, } else if (fName.equals("ndvbitvector")) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getDoubleStats().setBitVectors(v);; + statsObj.getStatsData().getDoubleStats().setBitVectors(v); + ; } } @@ -158,12 +165,13 @@ private void unpackDecimalStats(ObjectInspector oi, Object o, String fName, } else if (fName.equals("ndvbitvector")) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getDecimalStats().setBitVectors(v);; + statsObj.getStatsData().getDecimalStats().setBitVectors(v); + ; } } private Decimal convertToThriftDecimal(HiveDecimal d) { - return new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short)d.scale()); + return new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale()); } private void unpackLongStats(ObjectInspector oi, Object o, String fName, @@ -178,12 +186,13 @@ private void unpackLongStats(ObjectInspector oi, Object o, String fName, long v = ((LongObjectInspector) oi).get(o); statsObj.getStatsData().getLongStats().setHighValue(v); } else if (fName.equals("min")) { - long v = ((LongObjectInspector) oi).get(o); + long v = ((LongObjectInspector) oi).get(o); statsObj.getStatsData().getLongStats().setLowValue(v); } else if (fName.equals("ndvbitvector")) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getLongStats().setBitVectors(v);; + statsObj.getStatsData().getLongStats().setBitVectors(v); + ; } } @@ -204,7 +213,8 @@ private void unpackStringStats(ObjectInspector oi, Object o, String fName, } else if (fName.equals("ndvbitvector")) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getStringStats().setBitVectors(v);; + statsObj.getStatsData().getStringStats().setBitVectors(v); + ; } } @@ -239,11 +249,12 @@ private void unpackDateStats(ObjectInspector oi, Object o, String fName, } else if (fName.equals("ndvbitvector")) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getDateStats().setBitVectors(v);; + statsObj.getStatsData().getDateStats().setBitVectors(v); + ; } } - private void unpackPrimitiveObject (ObjectInspector oi, Object o, String fieldName, + private void unpackPrimitiveObject(ObjectInspector oi, Object o, String fieldName, ColumnStatisticsObj statsObj) throws UnsupportedDoubleException { if (o == null) { return; @@ -290,7 +301,7 @@ private void unpackPrimitiveObject (ObjectInspector oi, Object o, String fieldNa } else if (statsObj.getStatsData().isSetLongStats()) { unpackLongStats(oi, o, fieldName, statsObj); } else if (statsObj.getStatsData().isSetDoubleStats()) { - unpackDoubleStats(oi,o,fieldName, statsObj); + unpackDoubleStats(oi, o, fieldName, statsObj); } else if (statsObj.getStatsData().isSetStringStats()) { unpackStringStats(oi, o, fieldName, statsObj); } else if (statsObj.getStatsData().isSetBinaryStats()) { @@ -327,8 +338,8 @@ private void unpackStructObject(ObjectInspector oi, Object o, String fName, } } - private List constructColumnStatsFromPackedRows( - Hive db) throws HiveException, MetaException, IOException { + private List constructColumnStatsFromPackedRows(Hive db) throws HiveException, + MetaException, IOException { String currentDb = SessionState.get().getCurrentDatabase(); String tableName = work.getColStats().getTableName(); @@ -370,22 +381,54 @@ private void unpackStructObject(ObjectInspector oi, Object o, String fName, } } + List prunedStatsObjs = new ArrayList(); if (!isTblLevel) { List partVals = new ArrayList(); // Iterate over partition columns to figure out partition name for (int i = fields.size() - partColSchema.size(); i < fields.size(); i++) { - Object partVal = ((PrimitiveObjectInspector)fields.get(i).getFieldObjectInspector()). - getPrimitiveJavaObject(list.get(i)); + Object partVal = ((PrimitiveObjectInspector) fields.get(i).getFieldObjectInspector()) + .getPrimitiveJavaObject(list.get(i)); partVals.add(partVal == null ? // could be null for default partition - this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) : partVal.toString()); + this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) + : partVal.toString()); } partName = Warehouse.makePartName(partColSchema, partVals); + if (work.getColStats().isNeedMerge()) { + // check all the columns stats + List partNames = new ArrayList<>(); + partNames.add(partName); + org.apache.hadoop.hive.ql.metadata.Partition partition = db + .getPartitionsByNames(tbl, partNames).iterator().next(); + for (ColumnStatisticsObj statsObj : statsObjs) { + if (StatsSetupConst.canColumnStatsMerge(partition.getParameters(), + statsObj.getColName())) { + prunedStatsObjs.add(statsObj); + } else { + LOG.info("Skip merging column stats for " + statsObj.getColName()); + } + } + } else { + prunedStatsObjs.addAll(statsObjs); + } + } else { + if (work.getColStats().isNeedMerge()) { + // check all the columns stats + for (ColumnStatisticsObj statsObj : statsObjs) { + if (StatsSetupConst.canColumnStatsMerge(tbl.getParameters(), statsObj.getColName())) { + prunedStatsObjs.add(statsObj); + } else { + LOG.info("Skip merging column stats for " + statsObj.getColName()); + } + } + } else { + prunedStatsObjs.addAll(statsObjs); + } } - String [] names = Utilities.getDbTableName(currentDb, tableName); + String[] names = Utilities.getDbTableName(currentDb, tableName); ColumnStatisticsDesc statsDesc = getColumnStatsDesc(names[0], names[1], partName, isTblLevel); ColumnStatistics colStats = new ColumnStatistics(); colStats.setStatsDesc(statsDesc); - colStats.setStatsObj(statsObjs); + colStats.setStatsObj(prunedStatsObjs); if (!statsObjs.isEmpty()) { stats.add(colStats); } @@ -394,9 +437,8 @@ private void unpackStructObject(ObjectInspector oi, Object o, String fName, return stats; } - private ColumnStatisticsDesc getColumnStatsDesc(String dbName, String tableName, - String partName, boolean isTblLevel) - { + private ColumnStatisticsDesc getColumnStatsDesc(String dbName, String tableName, String partName, + boolean isTblLevel) { ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(); statsDesc.setDbName(dbName); statsDesc.setTableName(tableName); @@ -419,9 +461,7 @@ private int persistColumnStats(Hive db) throws HiveException, MetaException, IOE return 0; } SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats); - if (work.getColStats() != null && work.getColStats().getNumBitVector() > 0) { - request.setNeedMerge(true); - } + request.setNeedMerge(work.getColStats().isNeedMerge()); db.setPartitionColumnStatistics(request); return 0; } @@ -431,13 +471,38 @@ public int execute(DriverContext driverContext) { if (driverContext.getCtx().getExplainAnalyze() == AnalyzeState.RUNNING) { return 0; } - try { - Hive db = getHive(); - return persistColumnStats(db); - } catch (Exception e) { - LOG.error("Failed to run column stats task", e); + + // TODO: merge BasicStatsWork and BasicStatsNoJobWork + if (work.getBasicStatsWork() != null && work.getBasicStatsNoJobWork() != null) { + LOG.error("Can not have both basic stats work and stats no job work!"); + return 1; + } + boolean clearColumnStats = work.getfWork() == null; + if (work.getBasicStatsWork() != null) { + Task basicStatsTask = TaskFactory.get(work.getBasicStatsWork(), conf); + work.getBasicStatsWork().setClearColumnStats(clearColumnStats); + basicStatsTask.initialize(queryState, queryPlan, driverContext, null); + ((BasicStatsTask) basicStatsTask).setDpPartSpecs(dpPartSpecs); + ((BasicStatsTask) basicStatsTask).execute(driverContext); + } + if (work.getBasicStatsNoJobWork() != null) { + Task basicStatsTask = TaskFactory.get(work.getBasicStatsNoJobWork(), + conf); + work.getBasicStatsNoJobWork().setClearColumnStats(clearColumnStats); + basicStatsTask.initialize(queryState, queryPlan, driverContext, null); + ((BasicStatsNoJobTask) basicStatsTask).execute(driverContext); } - return 1; + + if (work.getfWork() != null) { + try { + Hive db = getHive(); + return persistColumnStats(db); + } catch (Exception e) { + LOG.error("Failed to run column stats task", e); + return 1; + } + } + return 0; } @Override @@ -449,4 +514,15 @@ public StageType getType() { public String getName() { return "COLUMNSTATS TASK"; } + + private Collection dpPartSpecs; + + @Override + protected void receiveFeed(FeedType feedType, Object feedValue) { + // this method should be called by MoveTask when there are dynamic + // partitions generated + if (feedType == FeedType.DYNAMIC_PARTITIONS) { + dpPartSpecs = (Collection) feedValue; + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index f329b5111b..e7b3c0f5cb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -233,7 +233,7 @@ private void releaseLocks(LoadTableDesc ltd) throws HiveException { // we check if there is only one immediate child task and it is stats task public boolean hasFollowingStatsTask() { if (this.getNumChild() == 1) { - return this.getChildTasks().get(0) instanceof StatsTask; + return this.getChildTasks().get(0) instanceof ColumnStatsTask; } return false; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java index d61a4607ea..73574fbdc6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java @@ -48,8 +48,8 @@ import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.ReplCopyWork; import org.apache.hadoop.hive.ql.plan.SparkWork; -import org.apache.hadoop.hive.ql.plan.StatsNoJobWork; -import org.apache.hadoop.hive.ql.plan.StatsWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsNoJobWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsWork; import org.apache.hadoop.hive.ql.plan.TezWork; /** @@ -93,9 +93,8 @@ public TaskTuple(Class workClass, Class> taskClass) { taskvec.add(new TaskTuple(MapredLocalWork.class, MapredLocalTask.class)); - taskvec.add(new TaskTuple(StatsWork.class, - StatsTask.class)); - taskvec.add(new TaskTuple(StatsNoJobWork.class, StatsNoJobTask.class)); + taskvec.add(new TaskTuple(BasicStatsWork.class, BasicStatsTask.class)); + taskvec.add(new TaskTuple(BasicStatsNoJobWork.class, BasicStatsNoJobTask.class)); taskvec.add(new TaskTuple(ColumnStatsWork.class, ColumnStatsTask.class)); taskvec.add(new TaskTuple(ColumnStatsUpdateWork.class, ColumnStatsUpdateTask.class)); taskvec.add(new TaskTuple(MergeFileWork.class, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 88c73f090b..2d217fdba3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -1739,7 +1739,9 @@ public Partition loadPartition(Path loadPath, Table tbl, } // column stats will be inaccurate - StatsSetupConst.clearColumnStatsState(newTPart.getParameters()); + if (!hasFollowingStatsTask) { + StatsSetupConst.clearColumnStatsState(newTPart.getParameters()); + } // recreate the partition if it existed before if (isSkewedStoreAsSubdir) { @@ -2111,7 +2113,9 @@ public void loadTable(Path loadPath, String tableName, boolean replace, boolean } //column stats will be inaccurate - StatsSetupConst.clearColumnStatsState(tbl.getParameters()); + if (!hasFollowingStatsTask) { + StatsSetupConst.clearColumnStatsState(tbl.getParameters()); + } try { if (isSkewedStoreAsSubdir) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java index 9297a0b874..b287b37170 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java @@ -28,6 +28,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.DriverContext; +import org.apache.hadoop.hive.ql.exec.ColumnStatsTask; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; @@ -45,10 +46,11 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.StatsNoJobWork; -import org.apache.hadoop.hive.ql.plan.StatsWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsNoJobWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsWork; import org.apache.hadoop.mapred.InputFormat; /** @@ -99,7 +101,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // There will not be any MR or Tez job above this task - StatsNoJobWork snjWork = new StatsNoJobWork(op.getConf().getTableMetadata().getTableSpec()); + BasicStatsNoJobWork snjWork = new BasicStatsNoJobWork(op.getConf().getTableMetadata().getTableSpec()); snjWork.setStatsReliable(parseCtx.getConf().getBoolVar( HiveConf.ConfVars.HIVE_STATS_RELIABLE)); // If partition is specified, get pruned partition list @@ -111,7 +113,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, partCols, false); snjWork.setPrunedPartitionList(partList); } - Task snjTask = TaskFactory.get(snjWork, parseCtx.getConf()); + Task snjTask = TaskFactory.get(snjWork, parseCtx.getConf()); ctx.setCurrTask(snjTask); ctx.setCurrTopOp(null); ctx.getRootTasks().clear(); @@ -121,14 +123,15 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, // The plan consists of a simple MapRedTask followed by a StatsTask. // The MR task is just a simple TableScanOperator - StatsWork statsWork = new StatsWork(op.getConf().getTableMetadata().getTableSpec()); + BasicStatsWork statsWork = new BasicStatsWork(op.getConf().getTableMetadata().getTableSpec()); statsWork.setAggKey(op.getConf().getStatsAggPrefix()); statsWork.setStatsTmpDir(op.getConf().getTmpStatsDir()); statsWork.setSourceTask(currTask); statsWork.setStatsReliable(parseCtx.getConf().getBoolVar( HiveConf.ConfVars.HIVE_STATS_RELIABLE)); - Task statsTask = TaskFactory.get(statsWork, parseCtx.getConf()); - currTask.addDependentTask(statsTask); + ColumnStatsWork columnStatsWork = new ColumnStatsWork(statsWork); + Task columnStatsTask = TaskFactory.get(columnStatsWork, parseCtx.getConf()); + currTask.addDependentTask(columnStatsTask); if (!ctx.getRootTasks().contains(currTask)) { ctx.getRootTasks().add(currTask); } @@ -136,15 +139,15 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // The plan consists of a StatsTask only. if (noScan) { - statsTask.setParentTasks(null); + columnStatsTask.setParentTasks(null); statsWork.setNoScanAnalyzeCommand(true); ctx.getRootTasks().remove(currTask); - ctx.getRootTasks().add(statsTask); + ctx.getRootTasks().add(columnStatsTask); } // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; if (partialScan) { - handlePartialScanCommand(op, ctx, parseCtx, currTask, statsWork, statsTask); + handlePartialScanCommand(op, ctx, parseCtx, currTask, statsWork, columnStatsTask); } currWork.getMapWork().setGatheringStats(true); @@ -188,7 +191,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, */ private void handlePartialScanCommand(TableScanOperator op, GenMRProcContext ctx, ParseContext parseCtx, Task currTask, - StatsWork statsWork, Task statsTask) throws SemanticException { + BasicStatsWork statsWork, Task statsTask) throws SemanticException { String aggregationKey = op.getConf().getStatsAggPrefix(); StringBuilder aggregationKeyBuffer = new StringBuilder(aggregationKey); List inputPaths = GenMapRedUtils.getInputPathsForPartialScan(op, aggregationKeyBuffer); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index 88bf829999..51ad381549 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -35,6 +35,7 @@ import java.util.Set; import com.google.common.annotations.VisibleForTesting; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.BlobStorageUtils; @@ -88,6 +89,7 @@ import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; import org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles; import org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles.ConditionalResolverMergeFilesCtx; import org.apache.hadoop.hive.ql.plan.ConditionalWork; @@ -111,7 +113,7 @@ import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.SparkWork; -import org.apache.hadoop.hive.ql.plan.StatsWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsWork; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.TezWork; @@ -496,6 +498,10 @@ public static void setMapWork(MapWork plan, ParseContext parseCtx, Set currTask, HiveConf hconf) { MoveWork mvWork = mvTask.getWork(); - StatsWork statsWork = null; + BasicStatsWork statsWork = null; if (mvWork.getLoadTableWork() != null) { - statsWork = new StatsWork(mvWork.getLoadTableWork()); + statsWork = new BasicStatsWork(mvWork.getLoadTableWork()); } else if (mvWork.getLoadFileWork() != null) { - statsWork = new StatsWork(mvWork.getLoadFileWork()); + statsWork = new BasicStatsWork(mvWork.getLoadFileWork()); } assert statsWork != null : "Error when generating StatsTask"; @@ -1504,7 +1510,8 @@ public static void addStatsTask(FileSinkOperator nd, MoveTask mvTask, // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix // in FileSinkDesc is used for stats publishing. They should be consistent. statsWork.setAggKey(nd.getConf().getStatsAggPrefix()); - Task statsTask = TaskFactory.get(statsWork, hconf); + ColumnStatsWork columnStatsWork = new ColumnStatsWork(statsWork); + Task statsTask = TaskFactory.get(columnStatsWork, hconf); // subscribe feeds from the MoveTask so that MoveTask can forward the list // of dynamic partition list to the StatsTask diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MemoryDecider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MemoryDecider.java index 3a20cfe7ac..32dbc8cb15 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MemoryDecider.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MemoryDecider.java @@ -35,10 +35,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.ColumnStatsTask; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.StatsTask; +import org.apache.hadoop.hive.ql.exec.BasicStatsTask; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.tez.DagUtils; import org.apache.hadoop.hive.ql.exec.tez.TezTask; @@ -92,8 +93,9 @@ public MemoryCalculator(PhysicalContext pctx) { public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) throws SemanticException { Task currTask = (Task) nd; - if (currTask instanceof StatsTask) { - currTask = ((StatsTask) currTask).getWork().getSourceTask(); + if (currTask instanceof ColumnStatsTask + && ((ColumnStatsTask) currTask).getWork().getBasicStatsWork() != null) { + currTask = ((ColumnStatsTask) currTask).getWork().getBasicStatsWork().getSourceTask(); } if (currTask instanceof TezTask) { TezWork work = ((TezTask) currTask).getWork(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SerializeFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SerializeFilter.java index dc433fed22..41bcbffe17 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SerializeFilter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SerializeFilter.java @@ -26,8 +26,9 @@ import java.util.Set; import java.util.Stack; +import org.apache.hadoop.hive.ql.exec.ColumnStatsTask; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; -import org.apache.hadoop.hive.ql.exec.StatsTask; +import org.apache.hadoop.hive.ql.exec.BasicStatsTask; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.tez.TezTask; @@ -71,8 +72,9 @@ public Serializer(PhysicalContext pctx) { public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) throws SemanticException { Task currTask = (Task) nd; - if (currTask instanceof StatsTask) { - currTask = ((StatsTask) currTask).getWork().getSourceTask(); + if (currTask instanceof ColumnStatsTask + && ((ColumnStatsTask) currTask).getWork().getBasicStatsWork() != null) { + currTask = ((ColumnStatsTask) currTask).getWork().getBasicStatsWork().getSourceTask(); } if (currTask instanceof TezTask) { TezWork work = ((TezTask) currTask).getWork(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index e9a4ff0748..ab424d257c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -91,6 +91,7 @@ import org.apache.hadoop.hive.ql.plan.CacheMetadataDesc; import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; import org.apache.hadoop.hive.ql.plan.ColumnStatsUpdateWork; +import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; import org.apache.hadoop.hive.ql.plan.CreateDatabaseDesc; import org.apache.hadoop.hive.ql.plan.CreateIndexDesc; import org.apache.hadoop.hive.ql.plan.DDLWork; @@ -130,7 +131,7 @@ import org.apache.hadoop.hive.ql.plan.ShowTablesDesc; import org.apache.hadoop.hive.ql.plan.ShowTblPropertiesDesc; import org.apache.hadoop.hive.ql.plan.ShowTxnsDesc; -import org.apache.hadoop.hive.ql.plan.StatsWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsWork; import org.apache.hadoop.hive.ql.plan.SwitchDatabaseDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TruncateTableDesc; @@ -1068,18 +1069,19 @@ private void analyzeTruncateTable(ASTNode ast) throws SemanticException { // Recalculate the HDFS stats if auto gather stats is set if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { - StatsWork statDesc; + BasicStatsWork basicStatsWork; if (oldTblPartLoc.equals(newTblPartLoc)) { // If we're merging to the same location, we can avoid some metastore calls TableSpec tablepart = new TableSpec(this.db, conf, root); - statDesc = new StatsWork(tablepart); + basicStatsWork = new BasicStatsWork(tablepart); } else { - statDesc = new StatsWork(ltd); + basicStatsWork = new BasicStatsWork(ltd); } - statDesc.setNoStatsAggregator(true); - statDesc.setClearAggregatorStats(true); - statDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); - Task statTask = TaskFactory.get(statDesc, conf); + basicStatsWork.setNoStatsAggregator(true); + basicStatsWork.setClearAggregatorStats(true); + basicStatsWork.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + ColumnStatsWork columnStatsWork = new ColumnStatsWork(basicStatsWork); + Task statTask = TaskFactory.get(columnStatsWork, conf); moveTsk.addDependentTask(statTask); } } catch (HiveException e) { @@ -1683,18 +1685,19 @@ private void analyzeAlterTablePartMergeFiles(ASTNode ast, mergeTask.addDependentTask(moveTsk); if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { - StatsWork statDesc; + BasicStatsWork basicStatsWork; if (oldTblPartLoc.equals(newTblPartLoc)) { // If we're merging to the same location, we can avoid some metastore calls TableSpec tablepart = new TableSpec(db, conf, tableName, partSpec); - statDesc = new StatsWork(tablepart); + basicStatsWork = new BasicStatsWork(tablepart); } else { - statDesc = new StatsWork(ltd); + basicStatsWork = new BasicStatsWork(ltd); } - statDesc.setNoStatsAggregator(true); - statDesc.setClearAggregatorStats(true); - statDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); - Task statTask = TaskFactory.get(statDesc, conf); + basicStatsWork.setNoStatsAggregator(true); + basicStatsWork.setClearAggregatorStats(true); + basicStatsWork.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + ColumnStatsWork columnStatsWork = new ColumnStatsWork(basicStatsWork); + Task statTask = TaskFactory.get(columnStatsWork, conf); moveTsk.addDependentTask(statTask); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java index 7a0d4a752e..fbef0dd837 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java @@ -40,7 +40,7 @@ import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.exec.ExplainTask; import org.apache.hadoop.hive.ql.exec.FetchTask; -import org.apache.hadoop.hive.ql.exec.StatsTask; +import org.apache.hadoop.hive.ql.exec.BasicStatsTask; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.metadata.HiveException; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java index a9c1e61ba9..e493cdf656 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java @@ -205,10 +205,6 @@ protected void setupMapWork(MapWork mapWork, GenTezProcContext context, // All the setup is done in GenMapRedUtils GenMapRedUtils.setMapWork(mapWork, context.parseContext, context.inputs, partitions, root, alias, context.conf, false); - // we also collect table stats while collecting column stats. - if (context.parseContext.getAnalyzeRewrite() != null) { - mapWork.setGatheringStats(true); - } } // removes any union operator and clones the plan diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index bda94ff765..8506bc54ef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.parse; import org.apache.hadoop.hive.conf.HiveConf.StrictChecks; - import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import java.io.IOException; @@ -50,9 +49,10 @@ import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.MoveWork; -import org.apache.hadoop.hive.ql.plan.StatsWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsWork; import org.apache.hadoop.mapred.InputFormat; import com.google.common.collect.Lists; @@ -296,11 +296,12 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { // Update the stats which do not require a complete scan. Task statTask = null; if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { - StatsWork statDesc = new StatsWork(loadTableWork); - statDesc.setNoStatsAggregator(true); - statDesc.setClearAggregatorStats(true); - statDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); - statTask = TaskFactory.get(statDesc, conf); + BasicStatsWork basicStatsWork = new BasicStatsWork(loadTableWork); + basicStatsWork.setNoStatsAggregator(true); + basicStatsWork.setClearAggregatorStats(true); + basicStatsWork.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + ColumnStatsWork columnStatsWork = new ColumnStatsWork(basicStatsWork); + statTask = TaskFactory.get(columnStatsWork, conf); } // HIVE-3334 has been filed for load file with index auto update diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java index b6d7ee8a92..4e787ccd1e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java @@ -44,9 +44,10 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; +import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; import org.apache.hadoop.hive.ql.plan.MapWork; -import org.apache.hadoop.hive.ql.plan.StatsNoJobWork; -import org.apache.hadoop.hive.ql.plan.StatsWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsNoJobWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsWork; import org.apache.hadoop.hive.ql.plan.TezWork; import org.apache.hadoop.mapred.InputFormat; @@ -103,7 +104,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procContext, // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // There will not be any Tez job above this task - StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata() + BasicStatsNoJobWork snjWork = new BasicStatsNoJobWork(tableScan.getConf().getTableMetadata() .getTableSpec()); snjWork.setStatsReliable(parseContext.getConf().getBoolVar( HiveConf.ConfVars.HIVE_STATS_RELIABLE)); @@ -116,7 +117,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procContext, false); snjWork.setPrunedPartitionList(partList); } - Task snjTask = TaskFactory.get(snjWork, parseContext.getConf()); + Task snjTask = TaskFactory.get(snjWork, parseContext.getConf()); snjTask.setParentTasks(null); context.rootTasks.remove(context.currentTask); context.rootTasks.add(snjTask); @@ -127,27 +128,28 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procContext, // The plan consists of a simple TezTask followed by a StatsTask. // The Tez task is just a simple TableScanOperator - StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); - statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); - statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); - statsWork.setSourceTask(context.currentTask); - statsWork.setStatsReliable(parseContext.getConf().getBoolVar( + BasicStatsWork basicStatsWork = new BasicStatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); + basicStatsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); + basicStatsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); + basicStatsWork.setSourceTask(context.currentTask); + basicStatsWork.setStatsReliable(parseContext.getConf().getBoolVar( HiveConf.ConfVars.HIVE_STATS_RELIABLE)); - Task statsTask = TaskFactory.get(statsWork, parseContext.getConf()); + ColumnStatsWork columnStatsWork = new ColumnStatsWork(basicStatsWork); + Task statsTask = TaskFactory.get(columnStatsWork, parseContext.getConf()); context.currentTask.addDependentTask(statsTask); // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // The plan consists of a StatsTask only. if (parseContext.getQueryProperties().isNoScanAnalyzeCommand()) { statsTask.setParentTasks(null); - statsWork.setNoScanAnalyzeCommand(true); + columnStatsWork.getBasicStatsWork().setNoScanAnalyzeCommand(true); context.rootTasks.remove(context.currentTask); context.rootTasks.add(statsTask); } // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; if (parseContext.getQueryProperties().isPartialScanAnalyzeCommand()) { - handlePartialScanCommand(tableScan, parseContext, statsWork, context, statsTask); + handlePartialScanCommand(tableScan, parseContext, basicStatsWork, context, statsTask); } // NOTE: here we should use the new partition predicate pushdown API to @@ -166,65 +168,18 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procContext, return true; } - } else if (parseContext.getAnalyzeRewrite() != null) { - // we need to collect table stats while collecting column stats. - try { - context.currentTask.addDependentTask(genTableStats(context, tableScan)); - } catch (HiveException e) { - throw new SemanticException(e); - } } return null; } - private Task genTableStats(GenTezProcContext context, TableScanOperator tableScan) - throws HiveException { - Class inputFormat = tableScan.getConf().getTableMetadata() - .getInputFormatClass(); - ParseContext parseContext = context.parseContext; - Table table = tableScan.getConf().getTableMetadata(); - List partitions = new ArrayList<>(); - if (table.isPartitioned()) { - partitions.addAll(parseContext.getPrunedPartitions(tableScan).getPartitions()); - for (Partition partn : partitions) { - LOG.debug("XXX: adding part: " + partn); - context.outputs.add(new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK)); - } - } - TableSpec tableSpec = new TableSpec(table, partitions); - tableScan.getConf().getTableMetadata().setTableSpec(tableSpec); - - if (inputFormat.equals(OrcInputFormat.class)) { - // For ORC, there is no Tez Job for table stats. - StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata() - .getTableSpec()); - snjWork.setStatsReliable(parseContext.getConf().getBoolVar( - HiveConf.ConfVars.HIVE_STATS_RELIABLE)); - // If partition is specified, get pruned partition list - if (partitions.size() > 0) { - snjWork.setPrunedPartitionList(parseContext.getPrunedPartitions(tableScan)); - } - return TaskFactory.get(snjWork, parseContext.getConf()); - } else { - - StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); - statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); - statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); - statsWork.setSourceTask(context.currentTask); - statsWork.setStatsReliable(parseContext.getConf().getBoolVar( - HiveConf.ConfVars.HIVE_STATS_RELIABLE)); - return TaskFactory.get(statsWork, parseContext.getConf()); - } - } - /** * handle partial scan command. * * It is composed of PartialScanTask followed by StatsTask. */ private void handlePartialScanCommand(TableScanOperator tableScan, ParseContext parseContext, - StatsWork statsWork, GenTezProcContext context, Task statsTask) + BasicStatsWork statsWork, GenTezProcContext context, Task statsTask) throws SemanticException { String aggregationKey = tableScan.getConf().getStatsAggPrefix(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 9e84a29470..07c69cc9b0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -10439,10 +10439,7 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String throws SemanticException { // if it is not analyze command and not column stats, then do not gatherstats - // if it is column stats, but it is not tez, do not gatherstats - if ((!qbp.isAnalyzeCommand() && qbp.getAnalyzeRewrite() == null) - || (qbp.getAnalyzeRewrite() != null && !HiveConf.getVar(conf, - HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez"))) { + if (!qbp.isAnalyzeCommand() && qbp.getAnalyzeRewrite() == null) { tsDesc.setGatherStats(false); } else { if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index 08a8f00e06..5aafc88403 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -21,11 +21,13 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; +import java.util.Map; import java.util.Queue; import java.util.Set; import java.util.Stack; @@ -35,6 +37,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.ql.Context; @@ -43,7 +46,8 @@ import org.apache.hadoop.hive.ql.exec.ColumnStatsTask; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.StatsTask; +import org.apache.hadoop.hive.ql.exec.BasicStatsTask; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -51,11 +55,17 @@ import org.apache.hadoop.hive.ql.exec.spark.SparkTask; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; +import org.apache.hadoop.hive.ql.plan.BasicStatsNoJobWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsWork; import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; import org.apache.hadoop.hive.ql.plan.CreateTableDesc; @@ -76,6 +86,7 @@ import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.thrift.ThriftFormatter; import org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe; +import org.apache.hadoop.mapred.InputFormat; import com.google.common.collect.Interner; import com.google.common.collect.Interners; @@ -294,18 +305,44 @@ public void compile(final ParseContext pCtx, final List> leafTasks = new LinkedHashSet>(); - getLeafTasks(rootTasks, leafTasks); + // map from tablename to task (ColumnStatsTask which includes a BasicStatsTask) + Map map = new HashMap<>(); if (isCStats) { - genColumnStatsTask(pCtx.getAnalyzeRewrite(), loadFileWork, leafTasks, outerQueryLimit, 0); + if (rootTasks == null || rootTasks.size() != 1 || pCtx.getTopOps() == null + || pCtx.getTopOps().size() != 1) { + throw new SemanticException("Can not find correct root task!"); + } + try { + Task root = rootTasks.iterator().next(); + ColumnStatsTask tsk = (ColumnStatsTask) genTableStats(pCtx, pCtx.getTopOps().values() + .iterator().next(), root, outputs); + root.addDependentTask(tsk); + map.put(extractTableFullName((ColumnStatsTask) tsk), (ColumnStatsTask) tsk); + } catch (HiveException e) { + throw new SemanticException(e); + } + genColumnStatsTask(pCtx.getAnalyzeRewrite(), loadFileWork, map, outerQueryLimit, 0); } else { + Set> leafTasks = new LinkedHashSet>(); + getLeafTasks(rootTasks, leafTasks); + for (Task tsk : leafTasks) { + // find the correct ColumnStatsTask + if (tsk instanceof ColumnStatsTask) { + map.put(extractTableFullName((ColumnStatsTask) tsk), (ColumnStatsTask) tsk); + } + } for (ColumnStatsAutoGatherContext columnStatsAutoGatherContext : pCtx .getColumnStatsAutoGatherContexts()) { if (!columnStatsAutoGatherContext.isInsertInto()) { genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), - columnStatsAutoGatherContext.getLoadFileWork(), leafTasks, outerQueryLimit, 0); + columnStatsAutoGatherContext.getLoadFileWork(), map, outerQueryLimit, 0); } else { int numBitVector; try { @@ -314,7 +351,7 @@ public void compile(final ParseContext pCtx, final List genTableStats(ParseContext parseContext, TableScanOperator tableScan, Task currentTask, final HashSet outputs) + throws HiveException { + Class inputFormat = tableScan.getConf().getTableMetadata() + .getInputFormatClass(); + Table table = tableScan.getConf().getTableMetadata(); + List partitions = new ArrayList<>(); + if (table.isPartitioned()) { + partitions.addAll(parseContext.getPrunedPartitions(tableScan).getPartitions()); + for (Partition partn : partitions) { + LOG.debug("XXX: adding part: " + partn); + outputs.add(new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK)); + } + } + TableSpec tableSpec = new TableSpec(table, partitions); + tableScan.getConf().getTableMetadata().setTableSpec(tableSpec); + + if (inputFormat.equals(OrcInputFormat.class)) { + // For ORC, there is no Tez Job for table stats. + BasicStatsNoJobWork snjWork = new BasicStatsNoJobWork(tableScan.getConf().getTableMetadata() + .getTableSpec()); + snjWork.setStatsReliable(parseContext.getConf().getBoolVar( + HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + // If partition is specified, get pruned partition list + if (partitions.size() > 0) { + snjWork.setPrunedPartitionList(parseContext.getPrunedPartitions(tableScan)); + } + ColumnStatsWork columnStatsWork = new ColumnStatsWork(snjWork); + return TaskFactory.get(columnStatsWork, parseContext.getConf()); + } else { + BasicStatsWork statsWork = new BasicStatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); + statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); + statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); + statsWork.setSourceTask(currentTask); + statsWork.setStatsReliable(parseContext.getConf().getBoolVar( + HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + ColumnStatsWork columnStatsWork = new ColumnStatsWork(statsWork); + return TaskFactory.get(columnStatsWork, parseContext.getConf()); + } + } + private void patchUpAfterCTASorMaterializedView(final List> rootTasks, final HashSet outputs, Task createTask) { @@ -388,7 +486,8 @@ private void patchUpAfterCTASorMaterializedView(final List 0); for (Task task : leaves) { - if (task instanceof StatsTask) { + if (task instanceof ColumnStatsTask + && ((ColumnStatsTask) task).getWork().getBasicStatsWork() != null) { // StatsTask require table to already exist for (Task parentOfStatsTask : task.getParentTasks()) { parentOfStatsTask.addDependentTask(createTask); @@ -416,13 +515,12 @@ private void patchUpAfterCTASorMaterializedView(final List loadFileWork, Set> leafTasks, - int outerQueryLimit, int numBitVector) { - ColumnStatsTask cStatsTask = null; - ColumnStatsWork cStatsWork = null; + List loadFileWork, Map map, + int outerQueryLimit, int numBitVector) throws SemanticException { FetchWork fetch = null; String tableName = analyzeRewrite.getTableName(); List colName = analyzeRewrite.getColName(); @@ -450,10 +548,12 @@ protected void genColumnStatsTask(AnalyzeRewriteContext analyzeRewrite, ColumnStatsDesc cStatsDesc = new ColumnStatsDesc(tableName, colName, colType, isTblLevel, numBitVector); - cStatsWork = new ColumnStatsWork(fetch, cStatsDesc); - cStatsTask = (ColumnStatsTask) TaskFactory.get(cStatsWork, conf); - for (Task tsk : leafTasks) { - tsk.addDependentTask(cStatsTask); + ColumnStatsTask columnStatsTask = map.get(tableName); + if (columnStatsTask == null) { + throw new SemanticException("Can not find " + tableName + " in genColumnStatsTask"); + } else { + columnStatsTask.getWork().setfWork(fetch); + columnStatsTask.getWork().setColStats(cStatsDesc); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java index 52af3af2ea..f1b2eee42e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java @@ -42,10 +42,11 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.SparkWork; -import org.apache.hadoop.hive.ql.plan.StatsNoJobWork; -import org.apache.hadoop.hive.ql.plan.StatsWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsNoJobWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsWork; import org.apache.hadoop.mapred.InputFormat; import com.google.common.base.Preconditions; @@ -106,10 +107,10 @@ public Object process(Node nd, Stack stack, // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // There will not be any Spark job above this task - StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata().getTableSpec()); + BasicStatsNoJobWork snjWork = new BasicStatsNoJobWork(tableScan.getConf().getTableMetadata().getTableSpec()); snjWork.setStatsReliable(parseContext.getConf().getBoolVar( HiveConf.ConfVars.HIVE_STATS_RELIABLE)); - Task snjTask = TaskFactory.get(snjWork, parseContext.getConf()); + Task snjTask = TaskFactory.get(snjWork, parseContext.getConf()); snjTask.setParentTasks(null); context.rootTasks.remove(context.currentTask); context.rootTasks.add(snjTask); @@ -120,26 +121,27 @@ public Object process(Node nd, Stack stack, // The plan consists of a simple SparkTask followed by a StatsTask. // The Spark task is just a simple TableScanOperator - StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); - statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); - statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); - statsWork.setSourceTask(context.currentTask); - statsWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); - Task statsTask = TaskFactory.get(statsWork, parseContext.getConf()); + BasicStatsWork basicStatsWork = new BasicStatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); + basicStatsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); + basicStatsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); + basicStatsWork.setSourceTask(context.currentTask); + basicStatsWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + ColumnStatsWork columnStatsWork = new ColumnStatsWork(basicStatsWork); + Task statsTask = TaskFactory.get(columnStatsWork, parseContext.getConf()); context.currentTask.addDependentTask(statsTask); // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // The plan consists of a StatsTask only. if (parseContext.getQueryProperties().isNoScanAnalyzeCommand()) { statsTask.setParentTasks(null); - statsWork.setNoScanAnalyzeCommand(true); + basicStatsWork.setNoScanAnalyzeCommand(true); context.rootTasks.remove(context.currentTask); context.rootTasks.add(statsTask); } // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; if (parseContext.getQueryProperties().isPartialScanAnalyzeCommand()) { - handlePartialScanCommand(tableScan, parseContext, statsWork, context, statsTask); + handlePartialScanCommand(tableScan, parseContext, basicStatsWork, context, statsTask); } // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list, @@ -167,7 +169,7 @@ public Object process(Node nd, Stack stack, * It is composed of PartialScanTask followed by StatsTask. */ private void handlePartialScanCommand(TableScanOperator tableScan, ParseContext parseContext, - StatsWork statsWork, GenSparkProcContext context, Task statsTask) + BasicStatsWork statsWork, GenSparkProcContext context, Task statsTask) throws SemanticException { String aggregationKey = tableScan.getConf().getStatsAggPrefix(); StringBuilder aggregationKeyBuffer = new StringBuilder(aggregationKey); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/BasicStatsNoJobWork.java similarity index 81% rename from ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java rename to ql/src/java/org/apache/hadoop/hive/ql/plan/BasicStatsNoJobWork.java index 77c04f6c6e..8c8c8f6aaf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/BasicStatsNoJobWork.java @@ -29,22 +29,23 @@ /** * Client-side stats aggregator task. */ -@Explain(displayName = "Stats-Aggr Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) -public class StatsNoJobWork implements Serializable { +public class BasicStatsNoJobWork implements Serializable { private static final long serialVersionUID = 1L; private TableSpec tableSpecs; private boolean statsReliable; private PrunedPartitionList prunedPartitionList; - public StatsNoJobWork() { + boolean clearColumnStats; + + public BasicStatsNoJobWork() { } - public StatsNoJobWork(TableSpec tableSpecs) { + public BasicStatsNoJobWork(TableSpec tableSpecs) { this.tableSpecs = tableSpecs; } - public StatsNoJobWork(boolean statsReliable) { + public BasicStatsNoJobWork(boolean statsReliable) { this.statsReliable = statsReliable; } @@ -67,4 +68,12 @@ public void setPrunedPartitionList(PrunedPartitionList prunedPartitionList) { public PrunedPartitionList getPrunedPartitionList() { return prunedPartitionList; } + + public boolean isClearColumnStats() { + return clearColumnStats; + } + + public void setClearColumnStats(boolean clearColumnStats) { + this.clearColumnStats = clearColumnStats; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/BasicStatsWork.java similarity index 91% rename from ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java rename to ql/src/java/org/apache/hadoop/hive/ql/plan/BasicStatsWork.java index a5050c5368..4c3d9bd336 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/BasicStatsWork.java @@ -29,8 +29,7 @@ * ConditionalStats. * */ -@Explain(displayName = "Stats-Aggr Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) -public class StatsWork implements Serializable { +public class BasicStatsWork implements Serializable { private static final long serialVersionUID = 1L; private TableSpec tableSpecs; // source table spec -- for TableScanOperator @@ -63,19 +62,21 @@ // used by FS based stats collector private String statsTmpDir; + + boolean clearColumnStats; - public StatsWork() { + public BasicStatsWork() { } - public StatsWork(TableSpec tableSpecs) { + public BasicStatsWork(TableSpec tableSpecs) { this.tableSpecs = tableSpecs; } - public StatsWork(LoadTableDesc loadTableDesc) { + public BasicStatsWork(LoadTableDesc loadTableDesc) { this.loadTableDesc = loadTableDesc; } - public StatsWork(LoadFileDesc loadFileDesc) { + public BasicStatsWork(LoadFileDesc loadFileDesc) { this.loadFileDesc = loadFileDesc; } @@ -167,4 +168,12 @@ public Task getSourceTask() { public void setSourceTask(Task sourceTask) { this.sourceTask = sourceTask; } + + public boolean isClearColumnStats() { + return clearColumnStats; + } + + public void setClearColumnStats(boolean clearColumnStats) { + this.clearColumnStats = clearColumnStats; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java index 97f323f4b7..a756a29d8b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java @@ -30,6 +30,7 @@ private static final long serialVersionUID = 1L; private boolean isTblLevel; private int numBitVector; + private boolean needMerge; private String tableName; private List colName; private List colType; @@ -44,6 +45,7 @@ public ColumnStatsDesc(String tableName, List colName, List colT this.colType = colType; this.isTblLevel = isTblLevel; this.numBitVector = 0; + this.needMerge = false; } public ColumnStatsDesc(String tableName, List colName, @@ -53,6 +55,7 @@ public ColumnStatsDesc(String tableName, List colName, this.colType = colType; this.isTblLevel = isTblLevel; this.numBitVector = numBitVector; + this.needMerge = this.numBitVector != 0; } @Explain(displayName = "Table") @@ -99,4 +102,8 @@ public void setNumBitVector(int numBitVector) { this.numBitVector = numBitVector; } + public boolean isNeedMerge() { + return needMerge; + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsWork.java index 76811b1a93..61cebb857d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsWork.java @@ -24,22 +24,34 @@ import org.apache.hadoop.hive.ql.exec.ListSinkOperator; import org.apache.hadoop.hive.ql.plan.Explain.Level; - /** * ColumnStats Work. * */ -@Explain(displayName = "Column Stats Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) +@Explain(displayName = "Column Stats Work", explainLevels = { Level.USER, Level.DEFAULT, + Level.EXTENDED }) public class ColumnStatsWork implements Serializable { private static final long serialVersionUID = 1L; + // this is for basic stats + private BasicStatsWork basicStatsWork; + private BasicStatsNoJobWork basicStatsNoJobWork; private FetchWork fWork; private ColumnStatsDesc colStats; private static final int LIMIT = -1; - public ColumnStatsWork() { } + public ColumnStatsWork(BasicStatsWork basicStatsWork) { + super(); + this.basicStatsWork = basicStatsWork; + } + + public ColumnStatsWork(BasicStatsNoJobWork basicStatsNoJobWork) { + super(); + this.basicStatsNoJobWork = basicStatsNoJobWork; + } + public ColumnStatsWork(FetchWork work, ColumnStatsDesc colStats) { this.fWork = work; this.setColStats(colStats); @@ -85,4 +97,22 @@ public static int getLimit() { return LIMIT; } + @Explain(displayName = "Basic Stats Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + public BasicStatsWork getBasicStatsWork() { + return basicStatsWork; + } + + public void setBasicStatsWork(BasicStatsWork basicStatsWork) { + this.basicStatsWork = basicStatsWork; + } + + @Explain(displayName = "Basic Stats NoJob Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + public BasicStatsNoJobWork getBasicStatsNoJobWork() { + return basicStatsNoJobWork; + } + + public void setBasicStatsNoJobWork(BasicStatsNoJobWork basicStatsNoJobWork) { + this.basicStatsNoJobWork = basicStatsNoJobWork; + } + } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java index 7c66955e14..fc9c5754d6 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java @@ -109,6 +109,7 @@ public void setUp() throws Exception { .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); hiveConf.setBoolVar(HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK, true); + hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false); TxnDbUtil.setConfValues(hiveConf); TxnDbUtil.prepDb(); File f = new File(TEST_WAREHOUSE_DIR); diff --git a/ql/src/test/queries/clientpositive/autoColumnStats_10.q b/ql/src/test/queries/clientpositive/autoColumnStats_10.q new file mode 100644 index 0000000000..bf166d8701 --- /dev/null +++ b/ql/src/test/queries/clientpositive/autoColumnStats_10.q @@ -0,0 +1,52 @@ +set hive.mapred.mode=nonstrict; +set hive.stats.column.autogather=true; + +drop table p; + +CREATE TABLE p(insert_num int, c1 tinyint, c2 smallint); + +desc formatted p; + +insert into p values (1,22,333); + +desc formatted p; + +alter table p replace columns (insert_num int, c1 STRING, c2 STRING); + +desc formatted p; + +desc formatted p insert_num; +desc formatted p c1; + +insert into p values (2,11,111); + +desc formatted p; + +desc formatted p insert_num; +desc formatted p c1; + +set hive.stats.column.autogather=false; + +drop table p; + +CREATE TABLE p(insert_num int, c1 tinyint, c2 smallint); + +desc formatted p; + +insert into p values (1,22,333); + +desc formatted p; + +alter table p replace columns (insert_num int, c1 STRING, c2 STRING); + +desc formatted p; + +desc formatted p insert_num; +desc formatted p c1; + +insert into p values (2,11,111); + +desc formatted p; + +desc formatted p insert_num; +desc formatted p c1; diff --git a/ql/src/test/queries/clientpositive/bucket_num_reducers.q b/ql/src/test/queries/clientpositive/bucket_num_reducers.q index 06f334e833..5c5008eea7 100644 --- a/ql/src/test/queries/clientpositive/bucket_num_reducers.q +++ b/ql/src/test/queries/clientpositive/bucket_num_reducers.q @@ -1,4 +1,4 @@ -; +set hive.stats.column.autogather=false; set hive.exec.mode.local.auto=false; set mapred.reduce.tasks = 10; diff --git a/ql/src/test/queries/clientpositive/combine1.q b/ql/src/test/queries/clientpositive/combine1.q index 3bcb8b19c1..b300830884 100644 --- a/ql/src/test/queries/clientpositive/combine1.q +++ b/ql/src/test/queries/clientpositive/combine1.q @@ -7,6 +7,8 @@ set mapred.max.split.size=256; set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec; +set hive.stats.column.autogather=false; + -- SORT_QUERY_RESULTS create table combine1_1(key string, value string) stored as textfile; diff --git a/ql/src/test/queries/clientpositive/encryption_join_with_different_encryption_keys.q b/ql/src/test/queries/clientpositive/encryption_join_with_different_encryption_keys.q index 4dcea1f7ce..7159ad5995 100644 --- a/ql/src/test/queries/clientpositive/encryption_join_with_different_encryption_keys.q +++ b/ql/src/test/queries/clientpositive/encryption_join_with_different_encryption_keys.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; --SORT_QUERY_RESULTS -- Java JCE must be installed in order to hava a key length of 256 bits diff --git a/ql/src/test/queries/clientpositive/groupby1.q b/ql/src/test/queries/clientpositive/groupby1.q index a8c9a8dcf8..cd3a12b44e 100755 --- a/ql/src/test/queries/clientpositive/groupby1.q +++ b/ql/src/test/queries/clientpositive/groupby1.q @@ -1,3 +1,5 @@ +-- due to testMTQueries1 +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.map.aggr=false; diff --git a/ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q b/ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q index 6824c1c032..c0ddb8bce6 100644 --- a/ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q +++ b/ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set hive.exec.infer.bucket.sort=true; set hive.exec.infer.bucket.sort.num.buckets.power.two=true; diff --git a/ql/src/test/queries/clientpositive/join2.q b/ql/src/test/queries/clientpositive/join2.q index 8aedd561e2..c3c7c241e9 100644 --- a/ql/src/test/queries/clientpositive/join2.q +++ b/ql/src/test/queries/clientpositive/join2.q @@ -1,3 +1,5 @@ +-- due to testMTQueries1 +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; -- SORT_QUERY_RESULTS diff --git a/ql/src/test/queries/clientpositive/orc_wide_table.q b/ql/src/test/queries/clientpositive/orc_wide_table.q index 422a3c24b1..d2ec3857d0 100644 --- a/ql/src/test/queries/clientpositive/orc_wide_table.q +++ b/ql/src/test/queries/clientpositive/orc_wide_table.q @@ -1,4 +1,5 @@ set hive.mapred.mode=nonstrict; +set hive.stats.column.autogather=false; drop table if exists test_txt; drop table if exists test_orc; create table test_txt( diff --git a/ql/src/test/queries/clientpositive/partition_coltype_literals.q b/ql/src/test/queries/clientpositive/partition_coltype_literals.q index eb56b1a93d..8da4876b70 100644 --- a/ql/src/test/queries/clientpositive/partition_coltype_literals.q +++ b/ql/src/test/queries/clientpositive/partition_coltype_literals.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.compute.query.using.stats=false; drop table if exists partcoltypenum; create table partcoltypenum (key int, value string) partitioned by (tint tinyint, sint smallint, bint bigint); diff --git a/ql/src/test/queries/clientpositive/smb_join_partition_key.q b/ql/src/test/queries/clientpositive/smb_join_partition_key.q index 160bf5e36a..b8a8323a4e 100644 --- a/ql/src/test/queries/clientpositive/smb_join_partition_key.q +++ b/ql/src/test/queries/clientpositive/smb_join_partition_key.q @@ -1,3 +1,5 @@ +--because p1 is decimal +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; SET hive.enforce.sortmergebucketmapjoin=false; SET hive.auto.convert.sortmerge.join=true; diff --git a/ql/src/test/queries/clientpositive/udf_round_2.q b/ql/src/test/queries/clientpositive/udf_round_2.q index 43988c1225..38885a97d4 100644 --- a/ql/src/test/queries/clientpositive/udf_round_2.q +++ b/ql/src/test/queries/clientpositive/udf_round_2.q @@ -1,4 +1,5 @@ set hive.fetch.task.conversion=more; +set hive.stats.column.autogather=false; -- test for NaN (not-a-number) create table tstTbl1(n double); diff --git a/ql/src/test/results/clientnegative/fileformat_void_input.q.out b/ql/src/test/results/clientnegative/fileformat_void_input.q.out index 6043258506..538e8f4cc5 100644 --- a/ql/src/test/results/clientnegative/fileformat_void_input.q.out +++ b/ql/src/test/results/clientnegative/fileformat_void_input.q.out @@ -10,16 +10,4 @@ POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1 -POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -FAILED: SemanticException 3:20 Input format must implement InputFormat. Error encountered near token 'dest1' +FAILED: SemanticException 1:70 Input format must implement InputFormat. Error encountered near token 'dest1' diff --git a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out index 12dcc11794..fc61bc6f6d 100644 --- a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out +++ b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out @@ -79,7 +79,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -178,7 +178,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 8 numRows 500 rawDataSize 5312 @@ -277,7 +277,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 8 numRows 500 rawDataSize 5312 @@ -376,7 +376,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 8 numRows 500 rawDataSize 5312 @@ -475,7 +475,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 @@ -574,7 +574,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 @@ -673,7 +673,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 @@ -772,7 +772,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 @@ -871,7 +871,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out index 944482c6d5..5b9ba69e9c 100644 --- a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out +++ b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out @@ -88,7 +88,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 8 numRows 500 rawDataSize 5312 @@ -145,7 +145,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 12 numRows 500 rawDataSize 5312 @@ -275,7 +275,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 12 numRows 500 rawDataSize 5312 @@ -364,7 +364,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} #### A masked pattern was here #### numFiles 12 numRows 500 diff --git a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out index 426a4de206..61731254dc 100644 --- a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out +++ b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out @@ -232,7 +232,7 @@ STAGE PLANS: dt 100 ts 3.0 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -278,7 +278,7 @@ STAGE PLANS: dt 100 ts 6.30 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -469,7 +469,7 @@ STAGE PLANS: partcol1 1 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol @@ -538,7 +538,7 @@ STAGE PLANS: partcol1 2 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol diff --git a/ql/src/test/results/clientpositive/alter_table_add_partition.q.out b/ql/src/test/results/clientpositive/alter_table_add_partition.q.out index c96e5b43b7..3a3b38d92a 100644 --- a/ql/src/test/results/clientpositive/alter_table_add_partition.q.out +++ b/ql/src/test/results/clientpositive/alter_table_add_partition.q.out @@ -199,7 +199,7 @@ Database: default Table: mp #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\"}} numFiles 1 numRows 1 rawDataSize 1 diff --git a/ql/src/test/results/clientpositive/alter_table_serde2.q.out b/ql/src/test/results/clientpositive/alter_table_serde2.q.out index ae0ef54755..324859d664 100644 --- a/ql/src/test/results/clientpositive/alter_table_serde2.q.out +++ b/ql/src/test/results/clientpositive/alter_table_serde2.q.out @@ -79,7 +79,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -179,7 +179,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out b/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out index 2bfc04a142..3a47edb9cb 100644 --- a/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out +++ b/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out @@ -102,7 +102,7 @@ STAGE PLANS: partition values: age 15 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}} bucket_count -1 column.name.delimiter , columns name @@ -147,7 +147,7 @@ STAGE PLANS: partition values: age 30 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}} bucket_count -1 column.name.delimiter , columns name @@ -192,7 +192,7 @@ STAGE PLANS: partition values: age 40 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}} bucket_count -1 column.name.delimiter , columns name @@ -237,7 +237,7 @@ STAGE PLANS: partition values: age __HIVE_DEFAULT_PARTITION__ properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}} bucket_count -1 column.name.delimiter , columns name diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out index e22c3ef0fc..bcb1dd27cb 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -66,11 +66,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select * from loc_orc where state='OH' @@ -87,17 +87,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (state = 'OH') (type: boolean) - Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'OH' (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index a8e4854a00..150e7f8e91 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -66,11 +66,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: analyze table loc_orc compute statistics for columns state @@ -106,22 +106,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: state (type: string), locid (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -129,13 +129,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 658 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col1) keys: _col0 (type: string), _col2 (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -151,7 +151,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int) Reduce Operator Tree: Group By Operator @@ -159,10 +159,10 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -743,30 +743,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), zip (type: bigint) outputColumnNames: state, zip - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), zip (type: bigint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/auto_join1.q.out b/ql/src/test/results/clientpositive/auto_join1.q.out index 5f4bb7452a..dbd49a5f4a 100644 --- a/ql/src/test/results/clientpositive/auto_join1.q.out +++ b/ql/src/test/results/clientpositive/auto_join1.q.out @@ -15,13 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -44,7 +46,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +79,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,6 +110,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join14.q.out b/ql/src/test/results/clientpositive/auto_join14.q.out index 1dd677c3d6..affad1efad 100644 --- a/ql/src/test/results/clientpositive/auto_join14.q.out +++ b/ql/src/test/results/clientpositive/auto_join14.q.out @@ -15,13 +15,15 @@ FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src @@ -44,7 +46,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +79,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c1, c2 + Statistics: Num rows: 366 Data size: 3890 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,6 +110,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join17.q.out b/ql/src/test/results/clientpositive/auto_join17.q.out index d39c36eac3..87158b71d7 100644 --- a/ql/src/test/results/clientpositive/auto_join17.q.out +++ b/ql/src/test/results/clientpositive/auto_join17.q.out @@ -15,13 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -44,7 +46,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +79,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(value1, 16), compute_stats(key2, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,6 +110,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join19.q.out b/ql/src/test/results/clientpositive/auto_join19.q.out index 3f70055d81..d492a033c7 100644 --- a/ql/src/test/results/clientpositive/auto_join19.q.out +++ b/ql/src/test/results/clientpositive/auto_join19.q.out @@ -17,13 +17,15 @@ INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src2 @@ -46,7 +48,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +81,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -95,6 +112,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') diff --git a/ql/src/test/results/clientpositive/auto_join19_inclause.q.out b/ql/src/test/results/clientpositive/auto_join19_inclause.q.out index 3f70055d81..d492a033c7 100644 --- a/ql/src/test/results/clientpositive/auto_join19_inclause.q.out +++ b/ql/src/test/results/clientpositive/auto_join19_inclause.q.out @@ -17,13 +17,15 @@ INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src2 @@ -46,7 +48,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +81,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -95,6 +112,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') diff --git a/ql/src/test/results/clientpositive/auto_join2.q.out b/ql/src/test/results/clientpositive/auto_join2.q.out index b17d344985..0c3fa26637 100644 --- a/ql/src/test/results/clientpositive/auto_join2.q.out +++ b/ql/src/test/results/clientpositive/auto_join2.q.out @@ -15,13 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -62,7 +64,7 @@ STAGE PLANS: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -103,6 +105,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -119,6 +136,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join25.q.out b/ql/src/test/results/clientpositive/auto_join25.q.out index 534bdb6ff0..b8c712363c 100644 --- a/ql/src/test/results/clientpositive/auto_join25.q.out +++ b/ql/src/test/results/clientpositive/auto_join25.q.out @@ -18,8 +18,6 @@ PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest1 -FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask -ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask POSTHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') @@ -33,11 +31,13 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-6:CONDITIONAL -RUN: Stage-7:MAPREDLOCAL -RUN: Stage-1:MAPRED +RUN: Stage-7:CONDITIONAL +RUN: Stage-8:MAPREDLOCAL +RUN: Stage-5:MAPRED RUN: Stage-0:MOVE +RUN: Stage-3:MAPRED RUN: Stage-2:STATS +RUN: Stage-10:COLUMNSTATS PREHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 @@ -64,8 +64,6 @@ PREHOOK: Input: default@src PREHOOK: Output: default@dest_j2 FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask -FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask -ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value POSTHOOK: type: QUERY @@ -73,14 +71,16 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest_j2 POSTHOOK: Lineage: dest_j2.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j2.value SIMPLE [(src)src3.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-11:CONDITIONAL -RUN: Stage-14:MAPREDLOCAL +RUN: Stage-12:CONDITIONAL +RUN: Stage-15:MAPREDLOCAL RUN: Stage-1:MAPRED -RUN: Stage-8:CONDITIONAL -RUN: Stage-12:MAPREDLOCAL -RUN: Stage-2:MAPRED +RUN: Stage-9:CONDITIONAL +RUN: Stage-13:MAPREDLOCAL +RUN: Stage-7:MAPRED RUN: Stage-0:MOVE +RUN: Stage-4:MAPRED RUN: Stage-3:STATS +RUN: Stage-17:COLUMNSTATS PREHOOK: query: SELECT sum(hash(dest_j2.key,dest_j2.value)) FROM dest_j2 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j2 @@ -105,8 +105,6 @@ INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest_j1 -FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask -ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY @@ -114,11 +112,13 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-6:CONDITIONAL -RUN: Stage-7:MAPREDLOCAL -RUN: Stage-1:MAPRED +RUN: Stage-7:CONDITIONAL +RUN: Stage-8:MAPREDLOCAL +RUN: Stage-5:MAPRED RUN: Stage-0:MOVE +RUN: Stage-3:MAPRED RUN: Stage-2:STATS +RUN: Stage-10:COLUMNSTATS PREHOOK: query: SELECT sum(hash(dest_j1.key,dest_j1.value)) FROM dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 diff --git a/ql/src/test/results/clientpositive/auto_join26.q.out b/ql/src/test/results/clientpositive/auto_join26.q.out index b05145d871..50340cd2bd 100644 --- a/ql/src/test/results/clientpositive/auto_join26.q.out +++ b/ql/src/test/results/clientpositive/auto_join26.q.out @@ -15,13 +15,15 @@ INSERT OVERWRITE TABLE dest_j1 SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 + Stage-7 is a root stage + Stage-2 depends on stages: Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$hdt$_0:x @@ -98,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +129,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest_j1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join3.q.out b/ql/src/test/results/clientpositive/auto_join3.q.out index 35e8273766..d230ae9f54 100644 --- a/ql/src/test/results/clientpositive/auto_join3.q.out +++ b/ql/src/test/results/clientpositive/auto_join3.q.out @@ -15,13 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-5 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -64,7 +66,7 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -99,6 +101,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -115,6 +132,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join4.q.out b/ql/src/test/results/clientpositive/auto_join4.q.out index d4fb977d7c..13ccdc1f78 100644 --- a/ql/src/test/results/clientpositive/auto_join4.q.out +++ b/ql/src/test/results/clientpositive/auto_join4.q.out @@ -37,13 +37,15 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src2 @@ -66,7 +68,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -99,6 +101,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -115,6 +132,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/auto_join5.q.out b/ql/src/test/results/clientpositive/auto_join5.q.out index c3b562de99..b781026673 100644 --- a/ql/src/test/results/clientpositive/auto_join5.q.out +++ b/ql/src/test/results/clientpositive/auto_join5.q.out @@ -37,13 +37,15 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -66,7 +68,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -99,6 +101,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -115,6 +132,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/auto_join6.q.out b/ql/src/test/results/clientpositive/auto_join6.q.out index 53caf7d25e..d661908659 100644 --- a/ql/src/test/results/clientpositive/auto_join6.q.out +++ b/ql/src/test/results/clientpositive/auto_join6.q.out @@ -40,6 +40,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -98,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +129,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/auto_join7.q.out b/ql/src/test/results/clientpositive/auto_join7.q.out index a657c301ee..599636e8ac 100644 --- a/ql/src/test/results/clientpositive/auto_join7.q.out +++ b/ql/src/test/results/clientpositive/auto_join7.q.out @@ -50,6 +50,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -126,6 +128,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -140,6 +157,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6 + Column Types: int, string, int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/auto_join8.q.out b/ql/src/test/results/clientpositive/auto_join8.q.out index 5ca6798f29..d500e796fa 100644 --- a/ql/src/test/results/clientpositive/auto_join8.q.out +++ b/ql/src/test/results/clientpositive/auto_join8.q.out @@ -37,13 +37,15 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src2 @@ -66,7 +68,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -102,6 +104,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -118,6 +135,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/auto_join9.q.out b/ql/src/test/results/clientpositive/auto_join9.q.out index d7d7d181f0..6474ef0d31 100644 --- a/ql/src/test/results/clientpositive/auto_join9.q.out +++ b/ql/src/test/results/clientpositive/auto_join9.q.out @@ -15,13 +15,15 @@ FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -44,7 +46,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +79,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,6 +110,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out b/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out index 156be41502..a7508c287b 100644 --- a/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out +++ b/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out @@ -141,7 +141,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -162,7 +162,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -182,7 +182,7 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [$hdt$_0:orderpayment, $hdt$_1:dim_pay_date] + /orderpayment_small [$hdt$_1:orderpayment, $hdt$_2:dim_pay_date] Needs Tagging: true Reduce Operator Tree: Join Operator @@ -277,7 +277,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -298,7 +298,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -318,7 +318,7 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [$hdt$_2:deal] + /orderpayment_small [$hdt$_3:deal] #### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: @@ -414,7 +414,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -435,7 +435,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -455,7 +455,7 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [$hdt$_3:order_city] + /orderpayment_small [$hdt$_4:order_city] #### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: @@ -551,7 +551,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}} bucket_count -1 column.name.delimiter , columns userid @@ -572,7 +572,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}} bucket_count -1 column.name.delimiter , columns userid @@ -592,7 +592,7 @@ STAGE PLANS: name: default.user_small name: default.user_small Truncated Path -> Alias: - /user_small [$hdt$_4:user] + /user_small [$hdt$_0:user] #### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out index 8c7658c447..eda66193cf 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out @@ -70,8 +70,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -104,6 +107,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -114,6 +127,30 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -128,6 +165,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -141,6 +192,25 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from ( SELECT a.key key1, a.value value1, b.key key2, b.value value2 FROM tbl1 a JOIN tbl2 b @@ -247,8 +317,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -281,6 +354,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -291,6 +374,30 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -305,6 +412,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -318,6 +439,25 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from ( SELECT a.key key1, a.value value1, b.key key2, b.value value2 FROM tbl1 a JOIN tbl2 b @@ -424,8 +564,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -458,6 +601,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -468,6 +621,30 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -482,6 +659,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -495,6 +686,25 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from ( SELECT a.key key1, a.value value1, b.key key2, b.value value2 FROM tbl1 a JOIN tbl2 b diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out index b53e6704cc..9498f7562e 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out @@ -14,24 +14,6 @@ POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_table2 -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * -INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test_table1@ds=1 -PREHOOK: Output: default@test_table2@ds=1 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * -INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test_table1@ds=1 -POSTHOOK: Output: default@test_table2@ds=1 -POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) INTO 16 BUCKETS PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -40,16 +22,12 @@ POSTHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_table3 -PREHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' -POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -93,7 +71,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -172,6 +150,32 @@ STAGE PLANS: TotalFiles: 16 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -205,22 +209,86 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### -PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table1 -PREHOOK: Input: default@test_table1@ds=1 -PREHOOK: Input: default@test_table2 -PREHOOK: Input: default@test_table2@ds=1 -PREHOOK: Output: default@test_table3@ds=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table1 -POSTHOOK: Input: default@test_table1@ds=1 -POSTHOOK: Input: default@test_table2 -POSTHOOK: Input: default@test_table2@ds=1 -POSTHOOK: Output: default@test_table3@ds=1 -POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ] + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: smb_mapjoin_11.test_table3 + Is Table Level Stats: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: '1' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '1' (type: string) + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: SELECT * FROM test_table1 ORDER BY key PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 @@ -1827,7 +1895,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -1936,7 +2004,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out index 9928a60095..bdf550f4b6 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out @@ -14,36 +14,6 @@ POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_table2 -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * -INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * -INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT * -INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '3') SELECT * -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test_table1@ds=1 -PREHOOK: Output: default@test_table2@ds=1 -PREHOOK: Output: default@test_table2@ds=2 -PREHOOK: Output: default@test_table2@ds=3 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * -INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * -INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT * -INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '3') SELECT * -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test_table1@ds=1 -POSTHOOK: Output: default@test_table2@ds=1 -POSTHOOK: Output: default@test_table2@ds=2 -POSTHOOK: Output: default@test_table2@ds=3 -POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2 PARTITION(ds=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2 PARTITION(ds=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -52,16 +22,11 @@ POSTHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_table3 -PREHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' -POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -129,7 +94,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -207,26 +172,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### -PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table1 -PREHOOK: Input: default@test_table1@ds=1 -PREHOOK: Input: default@test_table2 -PREHOOK: Input: default@test_table2@ds=1 -PREHOOK: Input: default@test_table2@ds=2 -PREHOOK: Input: default@test_table2@ds=3 -PREHOOK: Output: default@test_table3@ds=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table1 -POSTHOOK: Input: default@test_table1@ds=1 -POSTHOOK: Input: default@test_table2 -POSTHOOK: Input: default@test_table2@ds=1 -POSTHOOK: Input: default@test_table2@ds=2 -POSTHOOK: Input: default@test_table2@ds=3 -POSTHOOK: Output: default@test_table3@ds=1 -POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ] + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: smb_mapjoin_12.test_table3 + Is Table Level Stats: false + PREHOOK: query: SELECT COUNT(*) FROM test_table3 TABLESAMPLE(BUCKET 2 OUT OF 16) a JOIN test_table1 TABLESAMPLE(BUCKET 2 OUT OF 16) b ON a.key = b.key AND a.ds = '1' AND b.ds='1' PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 @@ -242,18 +195,11 @@ POSTHOOK: Input: default@test_table3 POSTHOOK: Input: default@test_table3@ds=1 #### A masked pattern was here #### 879 -PREHOOK: query: explain extended -INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') -SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') -SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' -POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -399,24 +345,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### -PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') -SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table1 -PREHOOK: Input: default@test_table1@ds=1 -PREHOOK: Input: default@test_table3 -PREHOOK: Input: default@test_table3@ds=1 -PREHOOK: Output: default@test_table3@ds=2 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') -SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table1 -POSTHOOK: Input: default@test_table1@ds=1 -POSTHOOK: Input: default@test_table3 -POSTHOOK: Input: default@test_table3@ds=1 -POSTHOOK: Output: default@test_table3@ds=2 -POSTHOOK: Lineage: test_table3 PARTITION(ds=2).key SIMPLE [(test_table3)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: test_table3 PARTITION(ds=2).value EXPRESSION [(test_table3)a.FieldSchema(name:value, type:string, comment:null), (test_table1)b.FieldSchema(name:value, type:string, comment:null), ] + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: smb_mapjoin_12.test_table3 + Is Table Level Stats: false + PREHOOK: query: SELECT count(*) from test_table3 tablesample (bucket 2 out of 16) a where ds = '2' PREHOOK: type: QUERY PREHOOK: Input: default@test_table3 diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out index 49ff6355b3..6cbb47375a 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out @@ -30,36 +30,6 @@ POSTHOOK: query: CREATE TABLE test_table4 (key INT, value STRING) CLUSTERED BY ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_table4 -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1 SELECT * -INSERT OVERWRITE TABLE test_table2 SELECT * -INSERT OVERWRITE TABLE test_table3 SELECT * -INSERT OVERWRITE TABLE test_table4 SELECT * -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test_table1 -PREHOOK: Output: default@test_table2 -PREHOOK: Output: default@test_table3 -PREHOOK: Output: default@test_table4 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1 SELECT * -INSERT OVERWRITE TABLE test_table2 SELECT * -INSERT OVERWRITE TABLE test_table3 SELECT * -INSERT OVERWRITE TABLE test_table4 SELECT * -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test_table1 -POSTHOOK: Output: default@test_table2 -POSTHOOK: Output: default@test_table3 -POSTHOOK: Output: default@test_table4 -POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2.value EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10 PREHOOK: type: QUERY @@ -112,7 +82,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -135,7 +105,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -292,7 +262,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -315,7 +285,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_16.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_16.q.out index ab2b323ff2..d6ca14ec1a 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_16.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_16.q.out @@ -14,24 +14,6 @@ POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_table2 -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1 SELECT * -INSERT OVERWRITE TABLE test_table2 SELECT * -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test_table1 -PREHOOK: Output: default@test_table2 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1 SELECT * -INSERT OVERWRITE TABLE test_table2 SELECT * -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test_table1 -POSTHOOK: Output: default@test_table2 -POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: EXPLAIN SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out index 82f5804eea..000ea34c67 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out @@ -54,50 +54,6 @@ POSTHOOK: query: load data local inpath '../../data/files/empty2.txt' into table POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket4_1 -PREHOOK: query: insert overwrite table smb_bucket4_2 -select * from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@smb_bucket4_2 -POSTHOOK: query: insert overwrite table smb_bucket4_2 -select * from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@smb_bucket4_2 -POSTHOOK: Lineage: smb_bucket4_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: smb_bucket4_2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table smb_join_results_empty_bigtable -select /*+mapjoin(b)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@smb_bucket4_1 -PREHOOK: Input: default@smb_bucket4_2 -PREHOOK: Output: default@smb_join_results_empty_bigtable -POSTHOOK: query: insert overwrite table smb_join_results_empty_bigtable -select /*+mapjoin(b)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@smb_bucket4_1 -POSTHOOK: Input: default@smb_bucket4_2 -POSTHOOK: Output: default@smb_join_results_empty_bigtable -POSTHOOK: Lineage: smb_join_results_empty_bigtable.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: smb_join_results_empty_bigtable.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: smb_join_results_empty_bigtable.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: smb_join_results_empty_bigtable.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: insert overwrite table smb_join_results_empty_bigtable -select /*+mapjoin(b)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@smb_bucket4_1 -PREHOOK: Input: default@smb_bucket4_2 -PREHOOK: Output: default@smb_join_results_empty_bigtable -POSTHOOK: query: insert overwrite table smb_join_results_empty_bigtable -select /*+mapjoin(b)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@smb_bucket4_1 -POSTHOOK: Input: default@smb_bucket4_2 -POSTHOOK: Output: default@smb_join_results_empty_bigtable -POSTHOOK: Lineage: smb_join_results_empty_bigtable.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: smb_join_results_empty_bigtable.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: smb_join_results_empty_bigtable.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: smb_join_results_empty_bigtable.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: select * from smb_join_results_empty_bigtable order by k1, v1, k2, v2 PREHOOK: type: QUERY PREHOOK: Input: default@smb_join_results_empty_bigtable @@ -606,20 +562,13 @@ NULL NULL 497 val_497 NULL NULL 498 val_498 NULL NULL 498 val_498 NULL NULL 498 val_498 -PREHOOK: query: explain -insert overwrite table smb_join_results -select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key -PREHOOK: type: QUERY -POSTHOOK: query: explain -insert overwrite table smb_join_results -select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key -POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -648,6 +597,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -671,6 +641,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: smb_mapjoin_7.smb_join_results + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -701,22 +678,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### -PREHOOK: query: insert overwrite table smb_join_results -select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@smb_bucket4_1 -PREHOOK: Input: default@smb_bucket4_2 -PREHOOK: Output: default@smb_join_results -POSTHOOK: query: insert overwrite table smb_join_results -select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@smb_bucket4_1 -POSTHOOK: Input: default@smb_bucket4_2 -POSTHOOK: Output: default@smb_join_results -POSTHOOK: Lineage: smb_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: smb_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: smb_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: select * from smb_join_results order by k1, v1, k2, v2 PREHOOK: type: QUERY PREHOOK: Input: default@smb_join_results @@ -1225,20 +1186,6 @@ NULL NULL 497 val_497 NULL NULL 498 val_498 NULL NULL 498 val_498 NULL NULL 498 val_498 -PREHOOK: query: insert overwrite table normal_join_results select * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@smb_bucket4_1 -PREHOOK: Input: default@smb_bucket4_2 -PREHOOK: Output: default@normal_join_results -POSTHOOK: query: insert overwrite table normal_join_results select * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@smb_bucket4_1 -POSTHOOK: Input: default@smb_bucket4_2 -POSTHOOK: Output: default@normal_join_results -POSTHOOK: Lineage: normal_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: normal_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: normal_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: normal_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from normal_join_results PREHOOK: type: QUERY PREHOOK: Input: default@normal_join_results diff --git a/ql/src/test/results/clientpositive/binary_output_format.q.out b/ql/src/test/results/clientpositive/binary_output_format.q.out index ddb6adf4a0..35a3db2234 100644 --- a/ql/src/test/results/clientpositive/binary_output_format.q.out +++ b/ql/src/test/results/clientpositive/binary_output_format.q.out @@ -54,6 +54,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -117,6 +118,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string) + outputColumnNames: mydata + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(mydata, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -168,6 +185,35 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -211,6 +257,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: mydata + Column Types: string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/bucket1.q.out b/ql/src/test/results/clientpositive/bucket1.q.out index 1d204731cd..2895baee0a 100644 --- a/ql/src/test/results/clientpositive/bucket1.q.out +++ b/ql/src/test/results/clientpositive/bucket1.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -129,6 +131,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -163,6 +192,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket1_1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucket1_1 select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/bucket2.q.out b/ql/src/test/results/clientpositive/bucket2.q.out index 48ccafb746..08f399c342 100644 --- a/ql/src/test/results/clientpositive/bucket2.q.out +++ b/ql/src/test/results/clientpositive/bucket2.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -129,6 +130,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -163,6 +199,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket2_1 + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucket2_1 select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/bucket3.q.out b/ql/src/test/results/clientpositive/bucket3.q.out index b1173e7b35..10ae348162 100644 --- a/ql/src/test/results/clientpositive/bucket3.q.out +++ b/ql/src/test/results/clientpositive/bucket3.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -127,6 +129,34 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -160,6 +190,90 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket3_1 + Is Table Level Stats: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1') select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out index 70cd53c6e5..ec69cfac07 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out @@ -117,13 +117,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a @@ -197,7 +199,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -257,6 +259,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -393,6 +422,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -451,13 +557,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a @@ -531,7 +639,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -570,7 +678,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -591,6 +699,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -704,7 +839,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -727,6 +862,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out index d0c3a1aca0..17c45c7bf1 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out @@ -101,13 +101,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:b @@ -181,7 +183,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 0 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -241,6 +243,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -377,6 +406,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -435,13 +541,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:b @@ -515,7 +623,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 0 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -554,7 +662,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -575,6 +683,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -688,7 +823,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -711,6 +846,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out index eed4a5a970..cb1ef40469 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out @@ -101,13 +101,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a @@ -181,7 +183,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -241,6 +243,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -377,6 +406,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -435,13 +541,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a @@ -515,7 +623,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -554,7 +662,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -575,6 +683,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -688,7 +823,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -711,6 +846,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out index 5743944b4c..688fdfa125 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out @@ -189,7 +189,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -212,7 +212,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -239,7 +239,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -262,7 +262,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -289,7 +289,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -312,7 +312,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -545,7 +545,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -568,7 +568,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -595,7 +595,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -618,7 +618,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -645,7 +645,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -668,7 +668,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key diff --git a/ql/src/test/results/clientpositive/bucketmapjoin13.q.out b/ql/src/test/results/clientpositive/bucketmapjoin13.q.out index 71b2924af2..406cca455a 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin13.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin13.q.out @@ -91,7 +91,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -192,7 +192,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name value column.name.delimiter , @@ -241,7 +241,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -371,7 +371,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -480,7 +480,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -615,7 +615,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -724,7 +724,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -859,7 +859,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -968,7 +968,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/bucketmapjoin5.q.out b/ql/src/test/results/clientpositive/bucketmapjoin5.q.out index 4b989932ce..f97c64c6da 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin5.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin5.q.out @@ -181,6 +181,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -272,6 +273,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 121 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -376,6 +393,35 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [b] /srcbucket_mapjoin_part/ds=2008-04-09 [b] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -418,6 +464,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -722,6 +776,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -792,7 +847,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -813,6 +868,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -917,6 +988,35 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -936,7 +1036,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -959,6 +1059,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -973,7 +1081,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1003,7 +1111,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1024,7 +1132,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1060,7 +1168,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1090,7 +1198,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1111,7 +1219,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 diff --git a/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out b/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out index 97cb1f10d3..8f95f97f4f 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out @@ -82,6 +82,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -212,6 +213,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -265,6 +282,35 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -307,6 +353,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out b/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out index 724df736ee..5ecf22f04a 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out @@ -91,6 +91,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -275,6 +276,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -328,6 +345,35 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -370,6 +416,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out index 165f0dc1e5..132ec23631 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -84,6 +85,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.key, x.value from ( @@ -153,6 +161,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -189,6 +198,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * from ( @@ -258,6 +274,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -294,6 +311,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.key+x.key, x.value from @@ -312,6 +336,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -343,6 +369,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -359,6 +401,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.k1, concat(x.v1, x.v1) from @@ -377,6 +455,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -413,3 +492,10 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out index c5e03be100..7a97b1fcf5 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out @@ -44,6 +44,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -80,6 +81,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: value, key + Column Types: string, int + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.value, x.key from (SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1')x @@ -151,6 +159,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -182,6 +192,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), '1' (type: string) + outputColumnNames: value, key, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(value, 16), compute_stats(key, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -198,6 +224,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: value, key + Column Types: string, int + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.key, x.value from (SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1')x diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out index 1d794c3d28..252d6ab248 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out @@ -65,39 +65,14 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -106,7 +81,7 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -121,8 +96,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: int) value expressions: _col2 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -134,6 +107,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, key2, value + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -150,98 +137,37 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col7) (type: string) - outputColumnNames: _col1, _col2 - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - value expressions: _col2 (type: string) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col7) (type: string) - outputColumnNames: _col1, _col2 - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - value expressions: _col2 (type: string) + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.key, concat(a.value, b.value) @@ -339,43 +265,14 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:b - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -388,7 +285,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -400,8 +297,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) value expressions: _col0 (type: int) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string) @@ -413,6 +308,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -429,104 +338,37 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:a - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: int) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.value diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out index 1e70105b9b..fb900709a5 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out @@ -65,43 +65,14 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:b - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -114,7 +85,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -129,8 +100,6 @@ STAGE PLANS: sort order: - Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -142,6 +111,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -158,110 +141,37 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:a - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col4 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col4) (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: - - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col4 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col4) (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: - - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -346,43 +256,14 @@ JOIN ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:test_table2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:test_table2 - TableScan - alias: test_table2 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -395,7 +276,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -410,8 +291,6 @@ STAGE PLANS: sort order: - Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -423,6 +302,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -439,110 +332,37 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:test_table1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:test_table1 - TableScan - alias: test_table1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: test_table2 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col3) (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: - - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: test_table1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col3) (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: - - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out index f3d30068ad..29f8896293 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out @@ -68,6 +68,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -116,6 +117,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, b.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -197,6 +205,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -245,6 +254,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT b.key, a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b diff --git a/ql/src/test/results/clientpositive/case_sensitivity.q.out b/ql/src/test/results/clientpositive/case_sensitivity.q.out index b3969ccf90..01588262d0 100644 --- a/ql/src/test/results/clientpositive/case_sensitivity.q.out +++ b/ql/src/test/results/clientpositive/case_sensitivity.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -46,6 +47,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 837 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/cast1.q.out b/ql/src/test/results/clientpositive/cast1.q.out index 9feb14f1bb..9967c5bb52 100644 --- a/ql/src/test/results/clientpositive/cast1.q.out +++ b/ql/src/test/results/clientpositive/cast1.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -44,6 +45,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: int), _col5 (type: string), _col6 (type: int) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3348 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3348 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3372 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3372 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -67,6 +94,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7 + Column Types: int, double, double, double, int, string, int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out index f260f034b6..d94cf30bbe 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out @@ -66,11 +66,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: state, locid, zip, year - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: analyze table loc_orc compute statistics for columns state @@ -106,22 +106,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: sq1:loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: state (type: string), locid (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -129,13 +129,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: state, locid, $f2 - Statistics: Num rows: 7 Data size: 658 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(locid) keys: state (type: string), $f2 (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -151,7 +151,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int) Reduce Operator Tree: Group By Operator @@ -159,10 +159,10 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: state, $f2, $f2_0 - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -767,30 +767,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), zip (type: bigint) outputColumnNames: state, zip - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), zip (type: bigint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: state, zip - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out b/ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out index b29628038c..f8fd963e5b 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out @@ -15,13 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: src1 @@ -44,7 +46,7 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +79,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(value1, 16), compute_stats(key2, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,6 +110,39 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out index d4d70bc35e..05ca4bd5db 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out @@ -22,6 +22,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -65,6 +67,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -79,6 +96,39 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2452 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2464 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) @@ -132,6 +182,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -175,6 +227,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -189,6 +256,39 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2452 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2464 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) diff --git a/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out index c09764c156..5ce2699a42 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out @@ -40,6 +40,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -74,6 +75,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16), compute_stats(c10, 16), compute_stats(c11, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5720 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5720 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5720 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -88,6 +109,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out b/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out index a7c9b3fc41..2b80ed4328 100644 --- a/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out +++ b/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out @@ -36,7 +36,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"month\":\"true\",\"year\":\"true\"}} numFiles 2 numRows 3 rawDataSize 24 @@ -56,10 +56,12 @@ Storage Desc Params: PREHOOK: query: analyze table calendar compute statistics for columns year PREHOOK: type: QUERY PREHOOK: Input: default@calendar +PREHOOK: Output: default@calendar #### A masked pattern was here #### POSTHOOK: query: analyze table calendar compute statistics for columns year POSTHOOK: type: QUERY POSTHOOK: Input: default@calendar +POSTHOOK: Output: default@calendar #### A masked pattern was here #### PREHOOK: query: desc formatted calendar PREHOOK: type: DESCTABLE @@ -79,7 +81,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"year\":\"true\"}} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"month\":\"true\",\"year\":\"true\"}} numFiles 2 numRows 3 rawDataSize 24 @@ -168,7 +170,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"month\":\"true\"}} #### A masked pattern was here #### numFiles 2 numRows 3 @@ -191,46 +193,12 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select max(month) from calendar POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: calendar - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: month (type: int) - outputColumnNames: month - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(month) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -322,29 +290,29 @@ STAGE PLANS: Map Operator Tree: TableScan alias: calendar - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: month (type: int) outputColumnNames: month - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(month) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/constprog_dp.q.out b/ql/src/test/results/clientpositive/constprog_dp.q.out index 8cf301d6f7..b0e4d95a1f 100644 --- a/ql/src/test/results/clientpositive/constprog_dp.q.out +++ b/ql/src/test/results/clientpositive/constprog_dp.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -43,6 +44,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -68,6 +103,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/correlationoptimizer5.q.out b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out index 00bdb4caa1..b783d8acb9 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer5.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out @@ -106,10 +106,12 @@ ON b.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-5 + Stage-2 depends on stages: Stage-1, Stage-6 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-5 is a root stage + Stage-8 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 + Stage-6 is a root stage STAGE PLANS: Stage: Stage-1 @@ -198,6 +200,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_co1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -212,7 +229,36 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-5 + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, string + Table: default.dest_co1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -314,8 +360,12 @@ ON b.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1, Stage-6 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 + Stage-6 is a root stage STAGE PLANS: Stage: Stage-1 @@ -351,6 +401,120 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1447 Data size: 5791 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1598 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1598 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 59 Data size: 237 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co2 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, string + Table: default.dest_co2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: TableScan alias: m Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: NONE @@ -383,85 +547,24 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: - Demux Operator - Statistics: Num rows: 2956 Data size: 12099 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co2 - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co2 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co2 - - Stage: Stage-2 - Stats-Aggr Operator + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 59 Data size: 237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 59 Data size: 237 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_co2 SELECT b.key, d.val @@ -510,21 +613,23 @@ JOIN ON b.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-14 is a root stage - Stage-10 depends on stages: Stage-14 - Stage-9 depends on stages: Stage-10, Stage-11 , consists of Stage-12, Stage-13, Stage-2 - Stage-12 has a backup stage: Stage-2 - Stage-7 depends on stages: Stage-12 - Stage-0 depends on stages: Stage-2, Stage-7, Stage-8 - Stage-3 depends on stages: Stage-0 + Stage-15 is a root stage + Stage-11 depends on stages: Stage-15 + Stage-10 depends on stages: Stage-11, Stage-12 , consists of Stage-13, Stage-14, Stage-2 Stage-13 has a backup stage: Stage-2 Stage-8 depends on stages: Stage-13 + Stage-0 depends on stages: Stage-2, Stage-8, Stage-9 + Stage-3 depends on stages: Stage-0 + Stage-17 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2, Stage-8, Stage-9 + Stage-14 has a backup stage: Stage-2 + Stage-9 depends on stages: Stage-14 Stage-2 - Stage-15 is a root stage - Stage-11 depends on stages: Stage-15 + Stage-16 is a root stage + Stage-12 depends on stages: Stage-16 STAGE PLANS: - Stage: Stage-14 + Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$hdt$_1:y @@ -547,7 +652,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-10 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan @@ -577,10 +682,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-9 + Stage: Stage-10 Conditional Operator - Stage: Stage-12 + Stage: Stage-13 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME1 @@ -594,7 +699,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-7 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -618,6 +723,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_co3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -634,7 +754,36 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-13 + Stage: Stage-17 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, string + Table: default.dest_co3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -648,7 +797,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-8 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -672,6 +821,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_co3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -712,8 +876,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_co3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-15 + Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:$hdt$_1:m @@ -736,7 +915,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-11 + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/cp_sel.q.out b/ql/src/test/results/clientpositive/cp_sel.q.out index af2efeb0cf..57a53e6474 100644 --- a/ql/src/test/results/clientpositive/cp_sel.q.out +++ b/ql/src/test/results/clientpositive/cp_sel.q.out @@ -82,6 +82,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -113,6 +115,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testpartbucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -130,6 +148,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.testpartbucket + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table testpartbucket partition(ds,hr) select key,value,'hello' as ds, 'world' as hr from srcpart where hr=11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/deleteAnalyze.q.out index 1bae859e2c..d26502882c 100644 --- a/ql/src/test/results/clientpositive/deleteAnalyze.q.out +++ b/ql/src/test/results/clientpositive/deleteAnalyze.q.out @@ -48,7 +48,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"amount\":\"true\",\"id\":\"true\",\"item\":\"true\",\"sales_tax\":\"true\"}} numFiles 1 numRows 2 rawDataSize 634 @@ -74,23 +74,27 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@testdeci2 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -amount decimal(10,3) from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +amount decimal(10,3) 12.123 123.123 0 2 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"amount\":\"true\",\"id\":\"true\",\"item\":\"true\",\"sales_tax\":\"true\"}} PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 +PREHOOK: Output: default@testdeci2 #### A masked pattern was here #### POSTHOOK: query: analyze table testdeci2 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@testdeci2 +POSTHOOK: Output: default@testdeci2 #### A masked pattern was here #### PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 +PREHOOK: Output: default@testdeci2 #### A masked pattern was here #### POSTHOOK: query: analyze table testdeci2 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@testdeci2 +POSTHOOK: Output: default@testdeci2 #### A masked pattern was here #### PREHOOK: query: explain select s.id, diff --git a/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out b/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out index d199574b29..b4efaf4511 100644 --- a/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out +++ b/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out @@ -61,7 +61,7 @@ STAGE PLANS: partcol1 1 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol @@ -130,7 +130,7 @@ STAGE PLANS: partcol1 1 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol @@ -199,7 +199,7 @@ STAGE PLANS: partcol1 1 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol @@ -245,7 +245,7 @@ STAGE PLANS: partcol1 1 partcol2 __HIVE_DEFAULT_PARTITION__ properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol diff --git a/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out b/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out index 1a0e46c31f..efbe2b67fe 100644 --- a/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out +++ b/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out @@ -220,9 +220,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -282,6 +286,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -296,11 +315,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) @@ -325,6 +380,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -336,9 +406,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: SELECT dest1.* FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 @@ -402,6 +494,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -461,6 +554,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -476,6 +582,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -491,6 +610,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -514,6 +659,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out index 76d0b7b02f..fa1b3d3909 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -71,3 +72,10 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.non_acid + diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out index 5d894abc79..18b2eb0ba0 100644 --- a/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out +++ b/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out @@ -595,7 +595,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -616,7 +616,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/explain_ddl.q.out b/ql/src/test/results/clientpositive/explain_ddl.q.out index e108e2207c..079b1f236d 100644 --- a/ql/src/test/results/clientpositive/explain_ddl.q.out +++ b/ql/src/test/results/clientpositive/explain_ddl.q.out @@ -496,6 +496,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -519,6 +520,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.m1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -542,6 +569,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.m1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out index b212da907b..323b1d5171 100644 --- a/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out @@ -102,7 +102,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -147,7 +147,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -215,7 +215,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -260,7 +260,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out index b5f4feede0..1e4a9996fe 100644 --- a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out @@ -119,7 +119,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -164,7 +164,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -209,7 +209,7 @@ STAGE PLANS: partition values: year 2002 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -254,7 +254,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -296,12 +296,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d - Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain extended select state,locid from loc_orc_1d @@ -322,7 +322,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -367,7 +367,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -412,7 +412,7 @@ STAGE PLANS: partition values: year 2002 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -457,7 +457,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -499,12 +499,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d - Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: analyze table loc_orc_1d partition(year='2000') compute statistics for columns state @@ -545,7 +545,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -590,7 +590,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -635,7 +635,7 @@ STAGE PLANS: partition values: year 2002 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -680,7 +680,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -748,7 +748,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -793,7 +793,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -838,7 +838,7 @@ STAGE PLANS: partition values: year 2002 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -883,7 +883,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -925,12 +925,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d - Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: create table if not exists loc_orc_2d ( @@ -1026,7 +1026,7 @@ STAGE PLANS: year 2001 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1072,7 +1072,7 @@ STAGE PLANS: year 2002 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1118,7 +1118,7 @@ STAGE PLANS: year 2003 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1164,7 +1164,7 @@ STAGE PLANS: year 2000 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1256,7 +1256,7 @@ STAGE PLANS: year 2002 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1302,7 +1302,7 @@ STAGE PLANS: year 2003 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1348,7 +1348,7 @@ STAGE PLANS: year 2000 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1394,7 +1394,7 @@ STAGE PLANS: year 2001 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1486,7 +1486,7 @@ STAGE PLANS: year 2003 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1528,12 +1528,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_2d - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain extended select state,locid from loc_orc_2d @@ -1555,7 +1555,7 @@ STAGE PLANS: year 2001 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1601,7 +1601,7 @@ STAGE PLANS: year 2002 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1647,7 +1647,7 @@ STAGE PLANS: year 2003 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1693,7 +1693,7 @@ STAGE PLANS: year 2000 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1785,7 +1785,7 @@ STAGE PLANS: year 2002 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1831,7 +1831,7 @@ STAGE PLANS: year 2003 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1877,7 +1877,7 @@ STAGE PLANS: year 2000 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1923,7 +1923,7 @@ STAGE PLANS: year 2001 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -2015,7 +2015,7 @@ STAGE PLANS: year 2003 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -2057,11 +2057,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_2d - Statistics: Num rows: 20 Data size: 1840 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1840 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE ListSink diff --git a/ql/src/test/results/clientpositive/filter_join_breaktask.q.out b/ql/src/test/results/clientpositive/filter_join_breaktask.q.out index 8f9b6363f4..884bfcdacc 100644 --- a/ql/src/test/results/clientpositive/filter_join_breaktask.q.out +++ b/ql/src/test/results/clientpositive/filter_join_breaktask.q.out @@ -89,7 +89,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -129,7 +129,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:f, $hdt$_1:m] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_1:f, $hdt$_2:m] Needs Tagging: true Reduce Operator Tree: Join Operator @@ -226,7 +226,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -266,7 +266,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_2:g] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:g] #### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/groupby10.q.out b/ql/src/test/results/clientpositive/groupby10.q.out index 66832b02fc..2a00eb8ec3 100644 --- a/ql/src/test/results/clientpositive/groupby10.q.out +++ b/ql/src/test/results/clientpositive/groupby10.q.out @@ -45,10 +45,16 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-12 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-13 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -119,6 +125,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -133,11 +149,69 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: int), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: key (type: int) @@ -156,7 +230,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -185,6 +259,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -196,9 +280,53 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM INPUT INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key @@ -290,10 +418,16 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-12 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-13 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -364,6 +498,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -378,11 +522,69 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: int), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: key (type: int) @@ -401,7 +603,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -430,6 +632,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -441,9 +653,53 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM INPUT INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key @@ -534,8 +790,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -574,6 +834,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0), avg(DISTINCT KEY._col1:1._col0) keys: KEY._col0 (type: int) @@ -592,6 +862,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -606,6 +886,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -616,9 +932,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM INPUT INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), avg(distinct substr(INPUT.value,5)) GROUP BY INPUT.key diff --git a/ql/src/test/results/clientpositive/groupby11.q.out b/ql/src/test/results/clientpositive/groupby11.q.out index 1d0e86ab7d..da6e0cce5f 100644 --- a/ql/src/test/results/clientpositive/groupby11.q.out +++ b/ql/src/test/results/clientpositive/groupby11.q.out @@ -33,10 +33,16 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-12 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-13 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -107,6 +113,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -123,11 +139,78 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, int, int + Table: default.dest1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, int, int + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + keys: '111' (type: string) + mode: partial1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: '111' (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '111' (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: substr(value, 5) (type: string), key (type: string) sort order: ++ Map-reduce partition columns: substr(value, 5) (type: string) @@ -146,7 +229,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -175,6 +258,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -188,9 +281,62 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + keys: '111' (type: string) + mode: partial1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: '111' (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '111' (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 partition(ds='111') SELECT src.value, count(src.key), count(distinct src.key) GROUP BY src.value diff --git a/ql/src/test/results/clientpositive/groupby12.q.out b/ql/src/test/results/clientpositive/groupby12.q.out index 921fc92b3c..0077fac6d4 100644 --- a/ql/src/test/results/clientpositive/groupby12.q.out +++ b/ql/src/test/results/clientpositive/groupby12.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -54,6 +56,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -68,6 +80,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby1_limit.q.out b/ql/src/test/results/clientpositive/groupby1_limit.q.out index 78a49ebf6c..56a8c02ceb 100644 --- a/ql/src/test/results/clientpositive/groupby1_limit.q.out +++ b/ql/src/test/results/clientpositive/groupby1_limit.q.out @@ -17,6 +17,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -40,7 +42,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator @@ -66,7 +67,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: double) Reduce Operator Tree: Select Operator @@ -88,6 +88,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -102,6 +117,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key ORDER BY src.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby1_map.q.out b/ql/src/test/results/clientpositive/groupby1_map.q.out index cc985a5def..a69a48298f 100644 --- a/ql/src/test/results/clientpositive/groupby1_map.q.out +++ b/ql/src/test/results/clientpositive/groupby1_map.q.out @@ -16,6 +16,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -59,6 +61,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -73,6 +90,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out b/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out index cc985a5def..a69a48298f 100644 --- a/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out +++ b/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out @@ -16,6 +16,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -59,6 +61,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -73,6 +90,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby1_map_skew.q.out b/ql/src/test/results/clientpositive/groupby1_map_skew.q.out index 116744a29e..bdbd7fb057 100644 --- a/ql/src/test/results/clientpositive/groupby1_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby1_map_skew.q.out @@ -17,6 +17,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -84,6 +86,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -98,6 +115,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby1_noskew.q.out b/ql/src/test/results/clientpositive/groupby1_noskew.q.out index 98c0d3c28e..4dbd9fe8de 100644 --- a/ql/src/test/results/clientpositive/groupby1_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby1_noskew.q.out @@ -16,6 +16,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -53,6 +55,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -67,6 +79,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest_g1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby2_map.q.out b/ql/src/test/results/clientpositive/groupby2_map.q.out index 0dcd8109f1..b03af2db40 100644 --- a/ql/src/test/results/clientpositive/groupby2_map.q.out +++ b/ql/src/test/results/clientpositive/groupby2_map.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +63,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +92,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out index 64477dbfd2..3508f6e2a2 100644 --- a/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +63,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +92,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY @@ -120,6 +166,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -163,6 +211,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -177,6 +240,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby2_map_skew.q.out b/ql/src/test/results/clientpositive/groupby2_map_skew.q.out index 813ae5cb26..36b70bbbd7 100644 --- a/ql/src/test/results/clientpositive/groupby2_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby2_map_skew.q.out @@ -19,6 +19,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -86,6 +88,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -100,6 +117,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby2_noskew.q.out b/ql/src/test/results/clientpositive/groupby2_noskew.q.out index 5192db3966..3065e9459a 100644 --- a/ql/src/test/results/clientpositive/groupby2_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby2_noskew.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -54,6 +56,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -68,6 +80,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest_g2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), c1 (type: int), c2 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out index 1c24213fba..8a52ddca65 100644 --- a/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -55,6 +57,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -69,6 +81,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16), compute_stats(VALUE._col4, 16), compute_stats(VALUE._col5, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby3.q.out b/ql/src/test/results/clientpositive/groupby3.q.out index 2ebeae450b..648afc9e3a 100644 --- a/ql/src/test/results/clientpositive/groupby3.q.out +++ b/ql/src/test/results/clientpositive/groupby3.q.out @@ -37,6 +37,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -73,6 +75,7 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: struct), _col3 (type: string), _col4 (type: string), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) Reduce Operator Tree: @@ -93,6 +96,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -107,6 +125,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4452 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4488 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4488 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/groupby3_map.q.out b/ql/src/test/results/clientpositive/groupby3_map.q.out index 07c122e2d9..9fe74dee5a 100644 --- a/ql/src/test/results/clientpositive/groupby3_map.q.out +++ b/ql/src/test/results/clientpositive/groupby3_map.q.out @@ -36,6 +36,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -77,6 +78,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +112,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out index a4501f7f7a..02d7bfdc7f 100644 --- a/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out @@ -40,6 +40,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -81,6 +82,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16), compute_stats(c10, 16), compute_stats(c11, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5412 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5412 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -95,6 +116,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/groupby3_map_skew.q.out b/ql/src/test/results/clientpositive/groupby3_map_skew.q.out index e02bdeb904..621339d65b 100644 --- a/ql/src/test/results/clientpositive/groupby3_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby3_map_skew.q.out @@ -37,6 +37,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -100,6 +101,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -114,6 +135,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/groupby3_noskew.q.out b/ql/src/test/results/clientpositive/groupby3_noskew.q.out index 624fd2db54..8771ecd5c0 100644 --- a/ql/src/test/results/clientpositive/groupby3_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby3_noskew.q.out @@ -36,6 +36,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -70,6 +71,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4488 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4488 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -84,6 +101,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out index a1d403d6cf..aa282aaea8 100644 --- a/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out @@ -40,6 +40,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -74,6 +75,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16), compute_stats(c10, 16), compute_stats(c11, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5448 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5448 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -88,6 +105,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/groupby4.q.out b/ql/src/test/results/clientpositive/groupby4.q.out index 3f77e47bd1..8ca3d8ef92 100644 --- a/ql/src/test/results/clientpositive/groupby4.q.out +++ b/ql/src/test/results/clientpositive/groupby4.q.out @@ -19,6 +19,9 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-3, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -72,6 +75,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -86,6 +99,57 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby4_map.q.out b/ql/src/test/results/clientpositive/groupby4_map.q.out index a8a7fbe052..a5067700fb 100644 --- a/ql/src/test/results/clientpositive/groupby4_map.q.out +++ b/ql/src/test/results/clientpositive/groupby4_map.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -53,6 +54,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -67,6 +88,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby4_map_skew.q.out b/ql/src/test/results/clientpositive/groupby4_map_skew.q.out index 3f38895cde..7048766bba 100644 --- a/ql/src/test/results/clientpositive/groupby4_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby4_map_skew.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -53,6 +54,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -67,6 +88,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby4_noskew.q.out b/ql/src/test/results/clientpositive/groupby4_noskew.q.out index c7db0d7016..c68dcbbb2e 100644 --- a/ql/src/test/results/clientpositive/groupby4_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby4_noskew.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -49,6 +51,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -63,6 +75,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby5.q.out b/ql/src/test/results/clientpositive/groupby5.q.out index 9bf01ee51b..a65baa74d1 100644 --- a/ql/src/test/results/clientpositive/groupby5.q.out +++ b/ql/src/test/results/clientpositive/groupby5.q.out @@ -23,6 +23,9 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-3, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -84,6 +87,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -98,6 +111,57 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) FROM src diff --git a/ql/src/test/results/clientpositive/groupby5_map.q.out b/ql/src/test/results/clientpositive/groupby5_map.q.out index 5fbd3d7dad..87b1ef8578 100644 --- a/ql/src/test/results/clientpositive/groupby5_map.q.out +++ b/ql/src/test/results/clientpositive/groupby5_map.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -55,6 +56,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -69,6 +90,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby5_map_skew.q.out b/ql/src/test/results/clientpositive/groupby5_map_skew.q.out index 60b010b501..6c21d2abe1 100644 --- a/ql/src/test/results/clientpositive/groupby5_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby5_map_skew.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -55,6 +56,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -69,6 +90,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby5_noskew.q.out b/ql/src/test/results/clientpositive/groupby5_noskew.q.out index 612a0f6112..d71f71c0b9 100644 --- a/ql/src/test/results/clientpositive/groupby5_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby5_noskew.q.out @@ -22,6 +22,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -59,6 +61,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -73,6 +85,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) FROM src diff --git a/ql/src/test/results/clientpositive/groupby6.q.out b/ql/src/test/results/clientpositive/groupby6.q.out index b79022405b..c874ba1852 100644 --- a/ql/src/test/results/clientpositive/groupby6.q.out +++ b/ql/src/test/results/clientpositive/groupby6.q.out @@ -19,6 +19,9 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-3, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -72,6 +75,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -86,6 +99,57 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby6_map.q.out b/ql/src/test/results/clientpositive/groupby6_map.q.out index 4ba3772298..3432642e92 100644 --- a/ql/src/test/results/clientpositive/groupby6_map.q.out +++ b/ql/src/test/results/clientpositive/groupby6_map.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -54,6 +56,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -68,6 +85,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby6_map_skew.q.out b/ql/src/test/results/clientpositive/groupby6_map_skew.q.out index 5141c0d9b3..69e417923c 100644 --- a/ql/src/test/results/clientpositive/groupby6_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby6_map_skew.q.out @@ -19,6 +19,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -77,6 +79,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -91,6 +108,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby6_noskew.q.out b/ql/src/test/results/clientpositive/groupby6_noskew.q.out index fd796c7b74..2b4d0a195a 100644 --- a/ql/src/test/results/clientpositive/groupby6_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby6_noskew.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -49,6 +51,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -63,6 +75,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby7_map.q.out b/ql/src/test/results/clientpositive/groupby7_map.q.out index 0ef29cd29f..371f7a5c90 100644 --- a/ql/src/test/results/clientpositive/groupby7_map.q.out +++ b/ql/src/test/results/clientpositive/groupby7_map.q.out @@ -28,9 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -90,6 +94,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -104,11 +123,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) @@ -133,6 +188,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -144,9 +214,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out index 7c3b033a62..b9857d1817 100644 --- a/ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out @@ -28,8 +28,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -69,6 +73,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -87,6 +106,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -101,6 +135,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -111,9 +181,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby7_map_skew.q.out b/ql/src/test/results/clientpositive/groupby7_map_skew.q.out index 4bfa52ed89..51b426a8a7 100644 --- a/ql/src/test/results/clientpositive/groupby7_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby7_map_skew.q.out @@ -29,10 +29,14 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-8 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-2 @@ -116,6 +120,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -130,11 +149,47 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) @@ -154,7 +209,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -183,6 +238,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -194,9 +264,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 + Stage: Stage-8 Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby7_noskew.q.out b/ql/src/test/results/clientpositive/groupby7_noskew.q.out index 6178f58f7e..06913d9c99 100644 --- a/ql/src/test/results/clientpositive/groupby7_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby7_noskew.q.out @@ -28,9 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -78,6 +82,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -92,11 +106,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) @@ -121,6 +171,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -132,9 +192,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out index f38c428781..c88f70bc40 100644 --- a/ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out @@ -29,9 +29,13 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-7, Stage-8 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-7, Stage-8 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-2 @@ -108,6 +112,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -122,11 +136,47 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -152,6 +202,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -163,9 +223,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-6 + Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 diff --git a/ql/src/test/results/clientpositive/groupby8.q.out b/ql/src/test/results/clientpositive/groupby8.q.out index 1856a9252b..695c3c822d 100644 --- a/ql/src/test/results/clientpositive/groupby8.q.out +++ b/ql/src/test/results/clientpositive/groupby8.q.out @@ -29,10 +29,16 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-12 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-13 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -103,6 +109,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -117,11 +133,69 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: string), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: key (type: string) @@ -140,7 +214,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -169,6 +243,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -180,9 +264,53 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -850,10 +978,16 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-12 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-13 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -924,6 +1058,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -938,11 +1082,69 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: string), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: key (type: string) @@ -961,7 +1163,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -990,6 +1192,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -1001,9 +1213,53 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby8_map.q.out b/ql/src/test/results/clientpositive/groupby8_map.q.out index f683a8ba71..c887dd5dbb 100644 --- a/ql/src/test/results/clientpositive/groupby8_map.q.out +++ b/ql/src/test/results/clientpositive/groupby8_map.q.out @@ -28,8 +28,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -68,6 +72,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) @@ -86,6 +105,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -100,6 +134,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -110,9 +180,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby8_map_skew.q.out b/ql/src/test/results/clientpositive/groupby8_map_skew.q.out index 5e60d3e924..c0682243fa 100644 --- a/ql/src/test/results/clientpositive/groupby8_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby8_map_skew.q.out @@ -29,10 +29,14 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-8 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-2 @@ -115,6 +119,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -129,11 +148,47 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) @@ -152,7 +207,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -181,6 +236,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -192,9 +262,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 + Stage: Stage-8 Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby8_noskew.q.out b/ql/src/test/results/clientpositive/groupby8_noskew.q.out index f683a8ba71..1f986582e6 100644 --- a/ql/src/test/results/clientpositive/groupby8_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby8_noskew.q.out @@ -28,8 +28,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -68,6 +72,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) @@ -86,6 +100,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -100,6 +124,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -110,9 +170,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby9.q.out b/ql/src/test/results/clientpositive/groupby9.q.out index 15ea1857c0..f3d32d289f 100644 --- a/ql/src/test/results/clientpositive/groupby9.q.out +++ b/ql/src/test/results/clientpositive/groupby9.q.out @@ -28,9 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -89,6 +93,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -103,11 +122,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -131,6 +186,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -142,9 +212,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value @@ -812,9 +904,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -873,6 +969,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -887,11 +998,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -915,6 +1062,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -926,9 +1088,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key @@ -1596,9 +1780,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -1657,6 +1845,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1671,11 +1874,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -1699,6 +1938,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -1710,9 +1964,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value @@ -2380,9 +2656,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -2442,6 +2722,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -2456,11 +2751,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -2485,6 +2816,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -2496,9 +2842,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value @@ -3166,9 +3534,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -3227,6 +3599,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -3241,11 +3628,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -3269,6 +3692,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -3280,9 +3718,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key diff --git a/ql/src/test/results/clientpositive/groupby_cube1.q.out b/ql/src/test/results/clientpositive/groupby_cube1.q.out index fd70a2c205..1b14461cbd 100644 --- a/ql/src/test/results/clientpositive/groupby_cube1.q.out +++ b/ql/src/test/results/clientpositive/groupby_cube1.q.out @@ -552,10 +552,14 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-8 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-2 @@ -640,6 +644,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -654,11 +673,47 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) @@ -678,7 +733,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -708,6 +763,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -719,9 +789,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 - Stage: Stage-7 + Stage: Stage-8 Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM T1 INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube diff --git a/ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out b/ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out index 1e13288c9a..590a23c1af 100644 --- a/ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out +++ b/ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out @@ -34,9 +34,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -92,6 +96,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -106,11 +125,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.t1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.t2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) @@ -130,6 +185,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -141,6 +211,28 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/groupby_map_ppr.q.out b/ql/src/test/results/clientpositive/groupby_map_ppr.q.out index 0e5b394215..21122972df 100644 --- a/ql/src/test/results/clientpositive/groupby_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/groupby_map_ppr.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -198,6 +200,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -231,6 +260,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) diff --git a/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out index dbcef22473..b5a8f7fa00 100644 --- a/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -198,6 +200,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct,struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -231,6 +260,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct,struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct,struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) diff --git a/ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out b/ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out index 5f02b04c38..7ed6fd8b28 100644 --- a/ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out @@ -28,9 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -89,6 +93,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -103,11 +122,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: double) @@ -131,6 +186,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -142,9 +212,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table dest1 select key, count(distinct value) group by key insert overwrite table dest2 select key+key, count(distinct value) group by key+key diff --git a/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out index 256784d3d7..00fd1b46cf 100644 --- a/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out @@ -54,10 +54,16 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-2 depends on stages: Stage-3 Stage-4 depends on stages: Stage-2 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7, Stage-8, Stage-9 + Stage-12 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-3 Stage-0 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -97,6 +103,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -118,6 +139,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 < 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -139,6 +175,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -153,6 +204,49 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g3 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g4 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Move Operator tables: @@ -163,9 +257,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -176,9 +292,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 - Stage: Stage-6 + Stage: Stage-8 Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) @@ -276,16 +414,26 @@ STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 Stage-6 depends on stages: Stage-2 + Stage-18 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-11, Stage-14, Stage-15, Stage-16, Stage-17 + Stage-19 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-11, Stage-14, Stage-15, Stage-16, Stage-17 + Stage-20 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-11, Stage-14, Stage-15, Stage-16, Stage-17 + Stage-21 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-11, Stage-14, Stage-15, Stage-16, Stage-17 + Stage-22 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-11, Stage-14, Stage-15, Stage-16, Stage-17 + Stage-7 depends on stages: Stage-5 Stage-0 depends on stages: Stage-5 - Stage-7 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-5 - Stage-8 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-0 Stage-9 depends on stages: Stage-5 - Stage-10 depends on stages: Stage-9 - Stage-3 depends on stages: Stage-10 - Stage-11 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-9 - Stage-12 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-5 + Stage-10 depends on stages: Stage-1 + Stage-11 depends on stages: Stage-5 + Stage-12 depends on stages: Stage-5 + Stage-13 depends on stages: Stage-12 + Stage-3 depends on stages: Stage-13 + Stage-14 depends on stages: Stage-3 + Stage-15 depends on stages: Stage-13 + Stage-4 depends on stages: Stage-12 + Stage-16 depends on stages: Stage-4 + Stage-17 depends on stages: Stage-12 STAGE PLANS: Stage: Stage-5 @@ -335,6 +483,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -356,6 +519,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 < 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -377,6 +555,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -391,6 +584,63 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-18 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 + + Stage: Stage-19 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g3 + + Stage: Stage-20 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g4 + + Stage: Stage-21 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_h2 + + Stage: Stage-22 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_h3 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Move Operator tables: @@ -401,9 +651,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 - Stage: Stage-7 + Stage: Stage-8 Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -414,10 +686,32 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 - Stage: Stage-8 + Stage: Stage-10 Stats-Aggr Operator - Stage: Stage-9 + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan @@ -467,8 +761,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-10 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan @@ -476,7 +785,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: string), _col3 (type: double), _col4 (type: bigint) Reduce Operator Tree: Select Operator @@ -498,6 +806,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Move Operator @@ -509,9 +832,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h2 - Stage: Stage-11 + Stage: Stage-14 Stats-Aggr Operator + Stage: Stage-15 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 Move Operator tables: @@ -522,9 +867,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h3 - Stage: Stage-12 + Stage: Stage-16 Stats-Aggr Operator + Stage: Stage-17 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) diff --git a/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out b/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out index 012b2114b4..5151068325 100644 --- a/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out +++ b/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out @@ -28,8 +28,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -71,6 +75,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, c1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 < 5) (type: boolean) Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE @@ -92,6 +111,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -106,6 +140,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, c1 + Column Types: string, int + Table: default.dest_g2 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, int + Table: default.dest_g3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -116,9 +186,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT src.key), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) diff --git a/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out b/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out index e41d9ef021..c9b6cec5a8 100644 --- a/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out +++ b/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out @@ -40,8 +40,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -83,6 +87,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE @@ -104,6 +123,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -118,6 +152,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -128,9 +198,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table e1 select key, count(*) @@ -206,8 +298,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -249,6 +345,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -270,6 +381,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -284,6 +410,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -294,9 +456,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table e1 select value, count(*) @@ -372,8 +556,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -415,6 +603,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE @@ -436,6 +639,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -450,6 +668,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -460,9 +714,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table e1 select key, count(*) @@ -538,8 +814,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -581,6 +861,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -602,6 +897,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -616,6 +926,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -626,9 +972,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table e1 select value, count(*) diff --git a/ql/src/test/results/clientpositive/groupby_position.q.out b/ql/src/test/results/clientpositive/groupby_position.q.out index 3ded69cf2a..109abc1458 100644 --- a/ql/src/test/results/clientpositive/groupby_position.q.out +++ b/ql/src/test/results/clientpositive/groupby_position.q.out @@ -28,9 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -87,6 +91,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -101,11 +120,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.testtable1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.testtable2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -129,6 +184,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -140,9 +210,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2 @@ -218,9 +310,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -277,6 +373,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -291,11 +402,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.testtable1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.testtable2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -319,6 +466,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -330,9 +492,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1 diff --git a/ql/src/test/results/clientpositive/groupby_ppr.q.out b/ql/src/test/results/clientpositive/groupby_ppr.q.out index e645f5f598..9dfe2fc968 100644 --- a/ql/src/test/results/clientpositive/groupby_ppr.q.out +++ b/ql/src/test/results/clientpositive/groupby_ppr.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -191,6 +193,28 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2 + columns.types string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -224,6 +248,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), c1 (type: int), c2 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2 + columns.types string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2 + columns.types string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) diff --git a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out index f300095bd7..c6a22b8fed 100644 --- a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -191,6 +193,28 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -224,6 +248,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16), compute_stats(VALUE._col4, 16), compute_stats(VALUE._col5, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2400 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) @@ -285,6 +386,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -431,7 +534,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"c1":"true","c2":"true","c3":"true","c4":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,c1,c2,c3,c4 @@ -452,6 +555,28 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -462,7 +587,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"c1":"true","c2":"true","c3":"true","c4":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,c1,c2,c3,c4 @@ -485,6 +610,87 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16), compute_stats(VALUE._col4, 16), compute_stats(VALUE._col5, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) diff --git a/ql/src/test/results/clientpositive/groupby_rollup1.q.out b/ql/src/test/results/clientpositive/groupby_rollup1.q.out index 5fd011e10e..b78d0623e1 100644 --- a/ql/src/test/results/clientpositive/groupby_rollup1.q.out +++ b/ql/src/test/results/clientpositive/groupby_rollup1.q.out @@ -397,10 +397,14 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-8 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-2 @@ -485,6 +489,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -499,11 +518,47 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) @@ -523,7 +578,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -553,6 +608,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -564,9 +634,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 - Stage: Stage-7 + Stage: Stage-8 Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM T1 INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with rollup INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with rollup diff --git a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out index 9865af497c..78b5eebaef 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -106,6 +107,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -161,6 +178,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -203,6 +249,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -428,6 +482,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -555,6 +611,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -588,6 +671,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, string, int + Table: default.outputtbl2 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY @@ -629,6 +789,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -666,7 +827,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -687,6 +848,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -742,6 +919,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -761,7 +967,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -784,6 +990,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -798,7 +1012,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -828,7 +1042,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -849,7 +1063,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -885,7 +1099,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -915,7 +1129,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -936,7 +1150,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1003,6 +1217,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -1040,7 +1255,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1061,6 +1276,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1116,6 +1347,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1135,7 +1395,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1158,6 +1418,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -1172,7 +1440,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1202,7 +1470,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1223,7 +1491,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1259,7 +1527,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1289,7 +1557,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1310,7 +1578,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1385,6 +1653,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -1443,6 +1712,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1498,6 +1783,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1540,6 +1854,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -1766,6 +2088,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -1893,6 +2217,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1926,27 +2277,104 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl4 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl4 -POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] -POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl4 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl4 + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl4 + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + +PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@outputtbl4 +POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@outputtbl4 +POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] +POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: SELECT * FROM outputTbl4 +PREHOOK: type: QUERY +PREHOOK: Input: default@outputtbl4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM outputTbl4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@outputtbl4 #### A masked pattern was here #### 1 1 11 1 2 1 12 1 @@ -1966,6 +2394,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -2072,7 +2502,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2093,6 +2523,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2103,7 +2560,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2126,6 +2583,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 PREHOOK: type: QUERY @@ -2168,6 +2702,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -2284,7 +2820,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2305,6 +2841,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2315,7 +2878,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2338,6 +2901,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key + key, sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -2389,6 +3029,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -2428,7 +3069,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2449,6 +3090,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -2480,7 +3137,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2501,6 +3158,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2556,6 +3229,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:t1, null-subquery2:$hdt$_0-subquery2:$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -2575,7 +3277,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2598,6 +3300,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -2612,7 +3322,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2642,7 +3352,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2663,7 +3373,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2699,7 +3409,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2729,7 +3439,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2750,7 +3460,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2839,6 +3549,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -2990,7 +3701,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3011,6 +3722,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false TableScan GatherStats: false Union @@ -3030,7 +3757,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3051,6 +3778,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3129,6 +3872,35 @@ STAGE PLANS: Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:t1] #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -3148,7 +3920,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3171,6 +3943,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -3185,7 +3965,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3215,7 +3995,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3236,7 +4016,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3272,7 +4052,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3302,7 +4082,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3323,7 +4103,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3409,6 +4189,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -3548,7 +4330,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3569,6 +4351,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3579,7 +4388,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3602,6 +4411,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -3957,6 +4843,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -3995,7 +4883,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4018,7 +4906,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4064,7 +4952,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4085,6 +4973,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -4095,7 +5010,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4111,12 +5026,89 @@ STAGE PLANS: serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-2 - Stats-Aggr Operator + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key @@ -4157,6 +5149,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -4194,7 +5187,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4215,6 +5208,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4224,7 +5233,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4247,7 +5256,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4270,6 +5279,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -4289,7 +5327,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4312,6 +5350,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -4326,7 +5372,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4356,7 +5402,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4377,7 +5423,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4413,7 +5459,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4443,7 +5489,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4464,7 +5510,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4542,6 +5588,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -4600,6 +5647,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key1, key2, key3, key4, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(key4, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4609,7 +5672,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4632,7 +5695,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4655,6 +5718,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -4697,6 +5789,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, key4, cnt + Column Types: int, int, string, int, int + Table: default.outputtbl5 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -4926,6 +6026,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -4963,7 +6064,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4984,6 +6085,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4993,7 +6110,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5016,7 +6133,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5039,6 +6156,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5058,7 +6204,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5081,6 +6227,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -5095,7 +6249,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5125,7 +6279,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5146,7 +6300,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5182,7 +6336,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5212,7 +6366,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5233,7 +6387,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5317,6 +6471,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -5354,7 +6509,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5375,6 +6530,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5384,7 +6555,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5407,7 +6578,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5430,6 +6601,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5449,7 +6649,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5472,6 +6672,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -5486,7 +6694,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5516,7 +6724,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5537,7 +6745,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5573,7 +6781,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5603,7 +6811,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5624,7 +6832,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5720,8 +6928,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -5769,6 +6981,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -5788,6 +7015,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -5802,6 +7044,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -5812,9 +7090,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val @@ -5875,8 +7175,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -5927,6 +7231,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -5946,6 +7265,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -5960,6 +7294,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -5970,9 +7340,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val diff --git a/ql/src/test/results/clientpositive/groupby_sort_2.q.out b/ql/src/test/results/clientpositive/groupby_sort_2.q.out index de6bf14b0f..e49e9561d5 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_2.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_2.q.out @@ -46,6 +46,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -90,6 +92,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -104,6 +121,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: val, cnt + Column Types: string, int + Table: default.outputtbl1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT val, count(1) FROM T1 GROUP BY val PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby_sort_3.q.out b/ql/src/test/results/clientpositive/groupby_sort_3.q.out index da1db8c5c7..d295a0564f 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_3.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_3.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -81,6 +82,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -104,6 +131,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -183,6 +217,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -216,6 +251,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -239,6 +300,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.outputtbl2 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/groupby_sort_4.q.out b/ql/src/test/results/clientpositive/groupby_sort_4.q.out index ae2ae66ba4..bed68f6ef5 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_4.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_4.q.out @@ -46,6 +46,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -90,6 +92,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -104,6 +121,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.outputtbl1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY @@ -149,6 +195,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -192,6 +240,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -206,6 +269,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby_sort_5.q.out b/ql/src/test/results/clientpositive/groupby_sort_5.q.out index 40b97696c2..4b0e49152e 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_5.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_5.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -81,6 +82,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -104,6 +131,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -211,6 +245,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -244,6 +279,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -267,6 +328,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -380,6 +448,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -424,6 +494,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -438,6 +523,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.outputtbl2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby_sort_6.q.out b/ql/src/test/results/clientpositive/groupby_sort_6.q.out index 93e3f26d66..3d80ab196d 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_6.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_6.q.out @@ -26,6 +26,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -102,6 +104,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -135,6 +164,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key PREHOOK: type: QUERY @@ -176,6 +282,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -231,7 +339,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -252,6 +360,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -262,7 +397,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -285,6 +420,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key PREHOOK: type: QUERY @@ -317,6 +529,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -419,7 +633,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -440,6 +654,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -450,7 +691,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -473,6 +714,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '2' GROUP BY key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby_sort_7.q.out b/ql/src/test/results/clientpositive/groupby_sort_7.q.out index 21b0a37b5b..e7c9a956b8 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_7.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_7.q.out @@ -51,6 +51,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -84,6 +85,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -107,6 +134,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out index 16deb921ea..54ad57d4e4 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -106,6 +107,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -161,6 +178,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -203,6 +249,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -429,6 +483,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -624,6 +680,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -657,6 +740,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, string, int + Table: default.outputtbl2 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY @@ -698,6 +858,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -735,7 +896,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -756,6 +917,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -811,6 +988,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -830,7 +1036,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -853,6 +1059,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -867,7 +1081,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -897,7 +1111,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -918,7 +1132,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -954,7 +1168,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -984,7 +1198,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1005,7 +1219,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1072,6 +1286,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -1109,7 +1324,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1130,6 +1345,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1185,6 +1416,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1204,7 +1464,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1227,6 +1487,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -1241,7 +1509,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1271,7 +1539,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1292,7 +1560,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1328,7 +1596,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1358,7 +1626,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1379,7 +1647,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1454,6 +1722,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -1512,6 +1781,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1567,6 +1852,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1609,6 +1923,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -1836,6 +2158,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -2031,6 +2355,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2064,32 +2415,109 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl4 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl4 -POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] -POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl4 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl4 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl4 -#### A masked pattern was here #### -1 1 11 1 -2 1 12 1 -3 1 13 1 -7 1 17 1 + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + +PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@outputtbl4 +POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@outputtbl4 +POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] +POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: SELECT * FROM outputTbl4 +PREHOOK: type: QUERY +PREHOOK: Input: default@outputtbl4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM outputTbl4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@outputtbl4 +#### A masked pattern was here #### +1 1 11 1 +2 1 12 1 +3 1 13 1 +7 1 17 1 8 1 18 1 8 1 28 1 PREHOOK: query: EXPLAIN EXTENDED @@ -2105,6 +2533,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -2279,7 +2709,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2300,6 +2730,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2310,7 +2767,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2333,6 +2790,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 PREHOOK: type: QUERY @@ -2376,6 +2910,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -2560,7 +3096,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2581,6 +3117,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2591,7 +3154,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2614,6 +3177,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT cast(key + key as string), sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -2665,6 +3305,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -2704,7 +3345,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2725,6 +3366,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -2756,7 +3413,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2777,6 +3434,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2832,6 +3505,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:t1, null-subquery2:$hdt$_0-subquery2:$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -2851,7 +3553,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2874,6 +3576,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -2888,7 +3598,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2918,7 +3628,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2939,7 +3649,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2975,7 +3685,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3005,7 +3715,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3026,7 +3736,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3116,6 +3826,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -3335,7 +4046,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3356,6 +4067,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false TableScan GatherStats: false Union @@ -3375,7 +4102,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3396,6 +4123,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3474,6 +4217,35 @@ STAGE PLANS: Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:t1] #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -3493,7 +4265,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3516,6 +4288,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -3530,7 +4310,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3560,7 +4340,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3581,7 +4361,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3617,7 +4397,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3647,7 +4427,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3668,7 +4448,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3754,6 +4534,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -3893,7 +4675,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3914,6 +4696,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3924,7 +4733,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3947,6 +4756,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -4372,6 +5258,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -4410,7 +5298,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4433,7 +5321,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4547,7 +5435,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4568,6 +5456,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -4578,7 +5493,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4601,6 +5516,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key PREHOOK: type: QUERY @@ -4640,6 +5632,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -4677,7 +5670,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4698,6 +5691,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4707,7 +5716,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4730,7 +5739,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4753,6 +5762,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -4772,7 +5810,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4795,6 +5833,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -4809,7 +5855,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4839,7 +5885,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4860,7 +5906,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4896,7 +5942,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4926,7 +5972,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4947,7 +5993,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5025,6 +6071,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -5083,6 +6130,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key1, key2, key3, key4, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(key4, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5092,7 +6155,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5115,7 +6178,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5138,6 +6201,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5180,6 +6272,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, key4, cnt + Column Types: int, int, string, int, int + Table: default.outputtbl5 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -5409,6 +6509,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -5446,7 +6547,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5467,6 +6568,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5476,7 +6593,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5499,7 +6616,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5522,6 +6639,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5541,7 +6687,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5564,6 +6710,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -5578,7 +6732,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5608,7 +6762,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5629,7 +6783,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5665,7 +6819,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5695,7 +6849,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5716,7 +6870,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5800,6 +6954,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -5837,7 +6992,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5858,6 +7013,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5867,7 +7038,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5890,7 +7061,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5913,6 +7084,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5932,7 +7132,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5955,6 +7155,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -5969,7 +7177,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5999,7 +7207,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6020,7 +7228,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6056,7 +7264,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6086,7 +7294,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6107,7 +7315,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6204,8 +7412,12 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -6253,6 +7465,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -6296,6 +7523,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -6310,6 +7552,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -6320,9 +7598,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val @@ -6384,8 +7684,12 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -6436,6 +7740,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -6479,6 +7798,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -6493,6 +7827,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -6503,9 +7873,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val diff --git a/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out b/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out index 5b94c0eef0..fcf6a083bd 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -81,6 +82,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -104,6 +131,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out b/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out index c4b9dc4c62..be9f24a753 100644 --- a/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out +++ b/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -56,6 +57,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.implicit_cast_during_insert + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: c1, c2, p1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16) + keys: p1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -72,6 +94,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.implicit_cast_during_insert + PREHOOK: query: insert overwrite table implicit_cast_during_insert partition (p1) select key, value, key key1 from (select * from src where key in (0,1)) q distribute by key1 sort by key1 diff --git a/ql/src/test/results/clientpositive/index_auto_update.q.out b/ql/src/test/results/clientpositive/index_auto_update.q.out index e7bc0690ad..6f90f5de72 100644 --- a/ql/src/test/results/clientpositive/index_auto_update.q.out +++ b/ql/src/test/results/clientpositive/index_auto_update.q.out @@ -46,6 +46,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-0 Stage-1 depends on stages: Stage-2 Stage-3 depends on stages: Stage-1 + Stage-13 depends on stages: Stage-3, Stage-4, Stage-5, Stage-7 Stage-4 depends on stages: Stage-2 Stage-5 depends on stages: Stage-2 Stage-7 depends on stages: Stage-0 @@ -72,6 +73,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.temp + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-12 Conditional Operator @@ -137,6 +164,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: string, string + Table: default.temp + Stage: Stage-4 Stage: Stage-5 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort.q.out index 1aea388815..9ac873e177 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort.q.out @@ -40,7 +40,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -91,7 +91,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -142,7 +142,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -193,7 +193,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -244,7 +244,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 4970 @@ -295,7 +295,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 2654 rawDataSize 28466 @@ -346,7 +346,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 2654 rawDataSize 28466 @@ -397,7 +397,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -448,7 +448,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -499,7 +499,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -550,7 +550,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -601,7 +601,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -652,7 +652,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -703,7 +703,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -754,7 +754,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -805,7 +805,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -856,7 +856,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 6 rawDataSize 18 @@ -907,7 +907,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 6 rawDataSize 18 @@ -958,7 +958,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 618 rawDataSize 2964 @@ -1009,7 +1009,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 5 rawDataSize 19 @@ -1060,7 +1060,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -1111,7 +1111,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -1162,7 +1162,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -1213,7 +1213,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -1264,7 +1264,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 3582 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out index 52ebe5aa8d..2e996b9755 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out @@ -40,7 +40,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -62,8 +62,6 @@ SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask -ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key POSTHOOK: type: QUERY @@ -93,7 +91,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -105,8 +103,8 @@ SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No -Num Buckets: 1 -Bucket Columns: [key] -Sort Columns: [Order(col:key, order:1)] +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] Storage Desc Params: serialization.format 1 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out index d62d0b8d2f..d238545b44 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out @@ -58,7 +58,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 4812 @@ -98,7 +98,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 4812 @@ -161,7 +161,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1173 @@ -201,7 +201,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1173 @@ -266,7 +266,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 155 rawDataSize 586 @@ -306,7 +306,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 154 rawDataSize 591 @@ -428,9 +428,11 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-2, Stage-8 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -474,6 +476,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.test_table + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -500,6 +518,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table + Stage: Stage-3 Merge File Operator Map Operator Tree: @@ -520,6 +545,35 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) SELECT key, value, IF (key % 100 == 0, '11', '12') FROM (SELECT key, COUNT(*) AS value FROM srcpart @@ -568,7 +622,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 4 rawDataSize 14 @@ -608,7 +662,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 305 rawDataSize 1163 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out index 3e2966492c..92ae9132d9 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -68,6 +70,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), '1' (type: string) + outputColumnNames: key, value, agg, part + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(agg, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -84,6 +102,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, agg + Column Types: string, string, string + Table: default.test_table_out + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: SELECT key, value, count(1) FROM src GROUP BY ROLLUP (key, value) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -747,7 +801,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 619 rawDataSize 6309 @@ -1429,7 +1483,7 @@ Database: default Table: test_table_out_2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"grouping_key\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 619 rawDataSize 7547 @@ -1456,6 +1510,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -1500,6 +1556,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), '1' (type: string) + outputColumnNames: key, value, agg, part + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(agg, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1516,6 +1588,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, agg + Column Types: string, string, string + Table: default.test_table_out + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT key, value, count(1) FROM src GROUP BY key, value WITH CUBE PREHOOK: type: QUERY @@ -1552,7 +1660,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 928 rawDataSize 9954 @@ -1607,7 +1715,7 @@ Database: default Table: test_table_out_2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"grouping_key\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 928 rawDataSize 11810 @@ -1634,6 +1742,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -1678,6 +1788,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), '1' (type: string) + outputColumnNames: key, value, agg, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(agg, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1694,6 +1820,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, agg + Column Types: string, string, string + Table: default.test_table_out + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT key, value, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value) PREHOOK: type: QUERY @@ -1730,7 +1892,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 618 rawDataSize 6054 @@ -1785,7 +1947,7 @@ Database: default Table: test_table_out_2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"grouping_key\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 618 rawDataSize 7290 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out index 98a2f5f3c0..c8d1492840 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out @@ -46,7 +46,7 @@ Database: default Table: list_bucketing_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 309 rawDataSize 1482 @@ -116,7 +116,7 @@ Database: default Table: list_bucketing_table2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out index 32edd73030..fab1793670 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out @@ -58,6 +58,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -91,6 +92,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, part + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -116,6 +151,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -180,7 +222,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -218,6 +260,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -281,6 +325,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, part + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -297,6 +357,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT a.key, a.value FROM ( SELECT key, cast(count(*) AS STRING) AS value FROM test_table1 GROUP BY key @@ -372,6 +468,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -403,6 +500,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -428,6 +553,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -494,7 +626,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -523,6 +655,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -567,6 +701,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -583,6 +731,38 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key GROUP BY b.value @@ -621,7 +801,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 2728 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out index bf77d4ce5a..bf4daf29eb 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out @@ -40,7 +40,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -91,7 +91,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1028 rawDataSize 10968 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out index 59b20fe4da..114fcfeccb 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out @@ -46,7 +46,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -85,7 +85,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -142,7 +142,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -181,7 +181,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 2718 @@ -238,7 +238,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -277,7 +277,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -334,7 +334,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -373,7 +373,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 2690 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out index 0c61fe0212..29e29ae404 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out @@ -26,6 +26,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -56,6 +58,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table + Select Operator + expressions: _col0 (type: int), _col1 (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2008-04-08' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -73,6 +91,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), _col1 (type: string) + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-04-08' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-04-08' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) SELECT key2, value, cast(hr as int) FROM (SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 3 % 2) as hr @@ -123,7 +177,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 670 rawDataSize 5888 @@ -163,7 +217,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 330 rawDataSize 2924 diff --git a/ql/src/test/results/clientpositive/innerjoin.q.out b/ql/src/test/results/clientpositive/innerjoin.q.out index 99b3d856eb..f201ec4bea 100644 --- a/ql/src/test/results/clientpositive/innerjoin.q.out +++ b/ql/src/test/results/clientpositive/innerjoin.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -75,6 +77,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -89,6 +106,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 INNER JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/input11.q.out b/ql/src/test/results/clientpositive/input11.q.out index bb22ee86df..9543c65e32 100644 --- a/ql/src/test/results/clientpositive/input11.q.out +++ b/ql/src/test/results/clientpositive/input11.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -46,6 +47,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input11_limit.q.out b/ql/src/test/results/clientpositive/input11_limit.q.out index 597554e02f..4a1b77a2b4 100644 --- a/ql/src/test/results/clientpositive/input11_limit.q.out +++ b/ql/src/test/results/clientpositive/input11_limit.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -39,7 +41,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -61,6 +62,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +91,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 100 LIMIT 10 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/input12.q.out b/ql/src/test/results/clientpositive/input12.q.out index 3bb765cf78..470bbf2ef9 100644 --- a/ql/src/test/results/clientpositive/input12.q.out +++ b/ql/src/test/results/clientpositive/input12.q.out @@ -40,23 +40,18 @@ STAGE DEPENDENCIES: Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 Stage-4 depends on stages: Stage-0 + Stage-14 depends on stages: Stage-4, Stage-10, Stage-11, Stage-12, Stage-13 + Stage-15 depends on stages: Stage-4, Stage-10, Stage-11, Stage-12, Stage-13 + Stage-16 depends on stages: Stage-4, Stage-10, Stage-11, Stage-12, Stage-13 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-3 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-1 depends on stages: Stage-12, Stage-11, Stage-14 + Stage-1 depends on stages: Stage-3 Stage-10 depends on stages: Stage-1 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 - Stage-21 depends on stages: Stage-3 , consists of Stage-18, Stage-17, Stage-19 - Stage-18 - Stage-2 depends on stages: Stage-18, Stage-17, Stage-20 - Stage-16 depends on stages: Stage-2 - Stage-17 - Stage-19 - Stage-20 depends on stages: Stage-19 + Stage-11 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-3 + Stage-12 depends on stages: Stage-2 + Stage-13 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -80,6 +75,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key >= 100) and (key < 200)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -95,6 +103,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (key >= 200) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -110,6 +133,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -133,6 +185,27 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-15 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-16 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest3 + Stage: Stage-5 Map Reduce Map Operator Tree: @@ -163,15 +236,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -189,40 +253,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-21 - Conditional Operator - - Stage: Stage-18 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Move Operator @@ -237,38 +284,37 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 - Stage: Stage-16 + Stage: Stage-12 Stats-Aggr Operator - Stage: Stage-17 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-19 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-20 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 diff --git a/ql/src/test/results/clientpositive/input13.q.out b/ql/src/test/results/clientpositive/input13.q.out index 91ff23429b..1bc031daf1 100644 --- a/ql/src/test/results/clientpositive/input13.q.out +++ b/ql/src/test/results/clientpositive/input13.q.out @@ -42,29 +42,19 @@ STAGE DEPENDENCIES: Stage-7 Stage-0 depends on stages: Stage-7, Stage-6, Stage-9 Stage-5 depends on stages: Stage-0 + Stage-15 depends on stages: Stage-5, Stage-11, Stage-12, Stage-13, Stage-14, Stage-3 + Stage-16 depends on stages: Stage-5, Stage-11, Stage-12, Stage-13, Stage-14, Stage-3 + Stage-17 depends on stages: Stage-5, Stage-11, Stage-12, Stage-13, Stage-14, Stage-3 Stage-6 Stage-8 Stage-9 depends on stages: Stage-8 - Stage-16 depends on stages: Stage-4 , consists of Stage-13, Stage-12, Stage-14 - Stage-13 - Stage-1 depends on stages: Stage-13, Stage-12, Stage-15 + Stage-1 depends on stages: Stage-4 Stage-11 depends on stages: Stage-1 - Stage-12 - Stage-14 - Stage-15 depends on stages: Stage-14 - Stage-22 depends on stages: Stage-4 , consists of Stage-19, Stage-18, Stage-20 - Stage-19 - Stage-2 depends on stages: Stage-19, Stage-18, Stage-21 - Stage-17 depends on stages: Stage-2 - Stage-18 - Stage-20 - Stage-21 depends on stages: Stage-20 - Stage-27 depends on stages: Stage-4 , consists of Stage-24, Stage-23, Stage-25 - Stage-24 - Stage-3 depends on stages: Stage-24, Stage-23, Stage-26 - Stage-23 - Stage-25 - Stage-26 depends on stages: Stage-25 + Stage-12 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-4 + Stage-13 depends on stages: Stage-2 + Stage-14 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-4 @@ -88,6 +78,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key >= 100) and (key < 200)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -103,6 +106,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((key >= 200) and (key < 300)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -118,6 +136,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (key >= 300) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -132,6 +166,19 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-10 Conditional Operator @@ -155,6 +202,27 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-15 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-16 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-17 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest3 + Stage: Stage-6 Map Reduce Map Operator Tree: @@ -185,15 +253,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-16 - Conditional Operator - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -211,40 +270,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-14 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-15 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-22 - Conditional Operator - - Stage: Stage-19 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Move Operator @@ -259,47 +301,37 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 - Stage: Stage-17 + Stage: Stage-13 Stats-Aggr Operator - Stage: Stage-18 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-20 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-21 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-27 - Conditional Operator - - Stage: Stage-24 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### Stage: Stage-3 Move Operator @@ -307,34 +339,6 @@ STAGE PLANS: hdfs directory: true destination: target/warehouse/dest4.out - Stage: Stage-23 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-25 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-26 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 diff --git a/ql/src/test/results/clientpositive/input14.q.out b/ql/src/test/results/clientpositive/input14.q.out index af04a9896d..e5120fc0b1 100644 --- a/ql/src/test/results/clientpositive/input14.q.out +++ b/ql/src/test/results/clientpositive/input14.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -69,6 +71,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -83,6 +100,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM src SELECT TRANSFORM(src.key, src.value) diff --git a/ql/src/test/results/clientpositive/input14_limit.q.out b/ql/src/test/results/clientpositive/input14_limit.q.out index 9870ad53b4..828a09465d 100644 --- a/ql/src/test/results/clientpositive/input14_limit.q.out +++ b/ql/src/test/results/clientpositive/input14_limit.q.out @@ -29,6 +29,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -53,7 +55,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -79,7 +80,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -104,6 +104,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -118,6 +133,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM src SELECT TRANSFORM(src.key, src.value) diff --git a/ql/src/test/results/clientpositive/input17.q.out b/ql/src/test/results/clientpositive/input17.q.out index 057a92d163..8ecf7f97cb 100644 --- a/ql/src/test/results/clientpositive/input17.q.out +++ b/ql/src/test/results/clientpositive/input17.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,6 +68,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -80,6 +97,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM src_thrift SELECT TRANSFORM(src_thrift.aint + src_thrift.lint[0], src_thrift.lintstring[0]) diff --git a/ql/src/test/results/clientpositive/input18.q.out b/ql/src/test/results/clientpositive/input18.q.out index b341510502..be98cdae49 100644 --- a/ql/src/test/results/clientpositive/input18.q.out +++ b/ql/src/test/results/clientpositive/input18.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -69,6 +71,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -83,6 +100,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM src SELECT TRANSFORM(src.key, src.value, 1+2, 3+4) diff --git a/ql/src/test/results/clientpositive/input1_limit.q.out b/ql/src/test/results/clientpositive/input1_limit.q.out index 0ca1552ef2..4e69b72efb 100644 --- a/ql/src/test/results/clientpositive/input1_limit.q.out +++ b/ql/src/test/results/clientpositive/input1_limit.q.out @@ -28,9 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -52,7 +56,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Filter Operator predicate: (key < 100) (type: boolean) @@ -90,6 +93,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -104,14 +122,49 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -133,6 +186,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -144,9 +212,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 100 LIMIT 10 INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key < 100 LIMIT 5 diff --git a/ql/src/test/results/clientpositive/input20.q.out b/ql/src/test/results/clientpositive/input20.q.out index cf0ee1d414..076deaf105 100644 --- a/ql/src/test/results/clientpositive/input20.q.out +++ b/ql/src/test/results/clientpositive/input20.q.out @@ -36,6 +36,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -85,6 +87,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -99,6 +116,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM src MAP src.key, src.key diff --git a/ql/src/test/results/clientpositive/input30.q.out b/ql/src/test/results/clientpositive/input30.q.out index 130f22dc86..a9fb038543 100644 --- a/ql/src/test/results/clientpositive/input30.q.out +++ b/ql/src/test/results/clientpositive/input30.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -64,6 +65,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tst_dest30 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -78,6 +99,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: a + Column Types: int + Table: default.tst_dest30 + PREHOOK: query: insert overwrite table dest30 select count(1) from src PREHOOK: type: QUERY @@ -110,6 +138,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -147,6 +176,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest30 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -161,6 +210,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: a + Column Types: int + Table: default.dest30 + PREHOOK: query: insert overwrite table dest30 select count(1) from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/input31.q.out b/ql/src/test/results/clientpositive/input31.q.out index 264ebe57a9..0fe569be1f 100644 --- a/ql/src/test/results/clientpositive/input31.q.out +++ b/ql/src/test/results/clientpositive/input31.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -66,6 +67,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tst_dest31 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -80,6 +101,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: a + Column Types: int + Table: default.tst_dest31 + PREHOOK: query: insert overwrite table dest31 select count(1) from srcbucket PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/input32.q.out b/ql/src/test/results/clientpositive/input32.q.out index c8fdfd4b75..97a2f22967 100644 --- a/ql/src/test/results/clientpositive/input32.q.out +++ b/ql/src/test/results/clientpositive/input32.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -63,6 +64,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tst_dest32 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -77,6 +98,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: a + Column Types: int + Table: default.tst_dest32 + PREHOOK: query: insert overwrite table dest32 select count(1) from srcbucket PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/input33.q.out b/ql/src/test/results/clientpositive/input33.q.out index b35e2d0425..9d6fcc035b 100644 --- a/ql/src/test/results/clientpositive/input33.q.out +++ b/ql/src/test/results/clientpositive/input33.q.out @@ -36,6 +36,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -85,6 +87,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -99,6 +116,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM src MAP src.key, src.key diff --git a/ql/src/test/results/clientpositive/input34.q.out b/ql/src/test/results/clientpositive/input34.q.out index 72f66c3f17..201da7e15c 100644 --- a/ql/src/test/results/clientpositive/input34.q.out +++ b/ql/src/test/results/clientpositive/input34.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -64,6 +65,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -87,6 +114,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input35.q.out b/ql/src/test/results/clientpositive/input35.q.out index 8b869918e1..7509d14062 100644 --- a/ql/src/test/results/clientpositive/input35.q.out +++ b/ql/src/test/results/clientpositive/input35.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -64,6 +65,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -87,6 +114,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input36.q.out b/ql/src/test/results/clientpositive/input36.q.out index 76921ad6b8..d018fbe062 100644 --- a/ql/src/test/results/clientpositive/input36.q.out +++ b/ql/src/test/results/clientpositive/input36.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -64,6 +65,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -87,6 +114,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input38.q.out b/ql/src/test/results/clientpositive/input38.q.out index 0c4e81d0ef..36efd441c7 100644 --- a/ql/src/test/results/clientpositive/input38.q.out +++ b/ql/src/test/results/clientpositive/input38.q.out @@ -28,6 +28,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -58,6 +59,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -81,6 +108,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input3_limit.q.out b/ql/src/test/results/clientpositive/input3_limit.q.out index c7bc4accd9..19e5b7971d 100644 --- a/ql/src/test/results/clientpositive/input3_limit.q.out +++ b/ql/src/test/results/clientpositive/input3_limit.q.out @@ -41,6 +41,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -58,7 +60,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 58 Data size: 11603 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -81,7 +82,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -99,6 +99,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -113,6 +128,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.t2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key SORT BY key, value) T LIMIT 20 PREHOOK: type: QUERY PREHOOK: Input: default@t1 diff --git a/ql/src/test/results/clientpositive/input5.q.out b/ql/src/test/results/clientpositive/input5.q.out index a39952878d..c137f8b6dc 100644 --- a/ql/src/test/results/clientpositive/input5.q.out +++ b/ql/src/test/results/clientpositive/input5.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,6 +68,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -80,6 +97,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM src_thrift SELECT TRANSFORM(src_thrift.lint, src_thrift.lintstring) diff --git a/ql/src/test/results/clientpositive/input6.q.out b/ql/src/test/results/clientpositive/input6.q.out index 3d1a815cf6..cf007cfb2f 100644 --- a/ql/src/test/results/clientpositive/input6.q.out +++ b/ql/src/test/results/clientpositive/input6.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -46,6 +47,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input7.q.out b/ql/src/test/results/clientpositive/input7.q.out index 0545b1f774..380382f193 100644 --- a/ql/src/test/results/clientpositive/input7.q.out +++ b/ql/src/test/results/clientpositive/input7.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -43,6 +44,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: int) + outputColumnNames: c1, c2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -66,6 +93,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: c1, c2 + Column Types: double, int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input8.q.out b/ql/src/test/results/clientpositive/input8.q.out index d76fc2bc02..141ca38a00 100644 --- a/ql/src/test/results/clientpositive/input8.q.out +++ b/ql/src/test/results/clientpositive/input8.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -43,6 +44,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: c1, c2, c3 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -66,6 +93,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3 + Column Types: string, int, double + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input9.q.out b/ql/src/test/results/clientpositive/input9.q.out index af752e0af9..a4f1594181 100644 --- a/ql/src/test/results/clientpositive/input9.q.out +++ b/ql/src/test/results/clientpositive/input9.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -46,6 +47,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: value, key + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(value, 16), compute_stats(key, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: value, key + Column Types: string, int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input_part1.q.out b/ql/src/test/results/clientpositive/input_part1.q.out index a68544671f..df00d0365b 100644 --- a/ql/src/test/results/clientpositive/input_part1.q.out +++ b/ql/src/test/results/clientpositive/input_part1.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -72,6 +73,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(hr, 16), compute_stats(ds, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -125,6 +142,35 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -167,6 +213,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input_part10.q.out b/ql/src/test/results/clientpositive/input_part10.q.out index 4b552badc6..53f211e10d 100644 --- a/ql/src/test/results/clientpositive/input_part10.q.out +++ b/ql/src/test/results/clientpositive/input_part10.q.out @@ -30,6 +30,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -48,7 +50,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int), _col1 (type: int) Reduce Operator Tree: Select Operator @@ -70,6 +71,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_special + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008 04 08' (type: string), '10:11:12=455' (type: string) + outputColumnNames: a, b, ds, ts + Statistics: Num rows: 1 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16) + keys: ds (type: string), ts (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1174 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -87,6 +104,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: a, b + Column Types: string, string + Table: default.part_special + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1174 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE part_special PARTITION(ds='2008 04 08', ts = '10:11:12=455') SELECT 1, 2 FROM src LIMIT 1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/input_part2.q.out b/ql/src/test/results/clientpositive/input_part2.q.out index d5524feabc..8f2e341d03 100644 --- a/ql/src/test/results/clientpositive/input_part2.q.out +++ b/ql/src/test/results/clientpositive/input_part2.q.out @@ -33,13 +33,9 @@ STAGE DEPENDENCIES: Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 + Stage-1 depends on stages: Stage-2 Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -89,6 +85,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '12' (type: string), '2008-04-08' (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(hr, 16), compute_stats(ds, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: ((key < 100) and (ds = '2008-04-09')) (type: boolean) @@ -129,6 +141,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '12' (type: string), '2008-04-09' (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(hr, 16), compute_stats(ds, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -231,6 +270,35 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -270,8 +338,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Column Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true Stage: Stage-4 Map Reduce @@ -453,15 +527,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -491,188 +556,83 @@ STAGE PLANS: name: default.dest2 Stage: Stage-9 - Stats-Aggr Operator -#### A masked pattern was here #### + Column Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: int, string, string, string + Table: default.dest2 + Is Table Level Stats: true Stage: Stage-10 Map Reduce Map Operator Tree: TableScan GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}} - bucket_count -1 column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}} - bucket_count -1 column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - name: default.dest2 + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: #### A masked pattern was here #### - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - name: default.dest2 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + MultiFileSpray: false PREHOOK: query: FROM srcpart INSERT OVERWRITE TABLE dest1 SELECT srcpart.key, srcpart.value, srcpart.hr, srcpart.ds WHERE srcpart.key < 100 and srcpart.ds = '2008-04-08' and srcpart.hr = '12' diff --git a/ql/src/test/results/clientpositive/input_part5.q.out b/ql/src/test/results/clientpositive/input_part5.q.out index c6ae2fd58d..42e1ea513b 100644 --- a/ql/src/test/results/clientpositive/input_part5.q.out +++ b/ql/src/test/results/clientpositive/input_part5.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -46,6 +47,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(hr, 16), compute_stats(ds, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: string, string, string, string + Table: default.tmptable + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input_testsequencefile.q.out b/ql/src/test/results/clientpositive/input_testsequencefile.q.out index 60aaf83df5..b3f3defcb6 100644 --- a/ql/src/test/results/clientpositive/input_testsequencefile.q.out +++ b/ql/src/test/results/clientpositive/input_testsequencefile.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -43,6 +44,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest4_sequencefile + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -66,6 +93,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest4_sequencefile + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input_testxpath.q.out b/ql/src/test/results/clientpositive/input_testxpath.q.out index e07628aaea..b81b56c9fc 100644 --- a/ql/src/test/results/clientpositive/input_testxpath.q.out +++ b/ql/src/test/results/clientpositive/input_testxpath.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -43,6 +44,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, mapvalue + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(mapvalue, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -66,6 +93,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value, mapvalue + Column Types: int, string, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input_testxpath2.q.out b/ql/src/test/results/clientpositive/input_testxpath2.q.out index a0baccf925..373fcb0a5e 100644 --- a/ql/src/test/results/clientpositive/input_testxpath2.q.out +++ b/ql/src/test/results/clientpositive/input_testxpath2.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -46,6 +47,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: lint_size, lintstring_size, mstringstring_size + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(lint_size, 16), compute_stats(lintstring_size, 16), compute_stats(mstringstring_size, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: lint_size, lintstring_size, mstringstring_size + Column Types: int, int, int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/insert1.q.out b/ql/src/test/results/clientpositive/insert1.q.out index 39525787c9..685bfe26e6 100644 --- a/ql/src/test/results/clientpositive/insert1.q.out +++ b/ql/src/test/results/clientpositive/insert1.q.out @@ -34,6 +34,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -60,6 +61,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -83,6 +110,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -123,6 +157,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -149,6 +184,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -172,6 +233,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -226,6 +294,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -252,6 +321,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -275,6 +370,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: x.insert1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -315,6 +417,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -341,6 +444,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -364,6 +493,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -410,16 +546,14 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-3, Stage-9, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 + Stage-1 depends on stages: Stage-2 Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -443,6 +577,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -458,6 +605,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -481,6 +656,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: x.insert1 + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -511,15 +700,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -537,31 +717,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: x.insert1 - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: x.insert1 - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: CREATE DATABASE db2 PREHOOK: type: CREATEDATABASE diff --git a/ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out b/ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out index 49c1269cc1..5231349d39 100644 --- a/ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out +++ b/ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out @@ -42,6 +42,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -58,7 +60,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: -- Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -75,6 +76,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.destintable + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2011-11-11' (type: string), '11' (type: string) + outputColumnNames: one, two, ds, hr + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(one, 16), compute_stats(two, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -92,6 +109,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: one, two + Column Types: string, string + Table: default.destintable + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE destinTable PARTITION (ds='2011-11-11', hr='11') if not exists SELECT one,two FROM sourceTable WHERE ds='2011-11-11' AND hr='11' order by one desc, two desc limit 5 PREHOOK: type: QUERY @@ -175,6 +228,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -191,7 +246,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: -- Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -208,6 +262,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.destintable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: one, two + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(one, 16), compute_stats(two, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -222,6 +291,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: one, two + Column Types: string, string + Table: default.destintable + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE destinTable SELECT one,two FROM sourceTable WHERE ds='2011-11-11' AND hr='11' order by one desc, two desc limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@sourcetable diff --git a/ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out b/ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out index b5f28d289a..15e6d76f12 100644 --- a/ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out +++ b/ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out @@ -53,6 +53,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -69,7 +71,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: -- Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -86,6 +87,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: db2.destintable + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2011-11-11' (type: string) + outputColumnNames: one, two, ds + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(one, 16), compute_stats(two, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -102,6 +119,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: one, two + Column Types: string, string + Table: db2.destintable + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE db2.destinTable PARTITION (ds='2011-11-11') SELECT one,two FROM db1.sourceTable WHERE ds='2011-11-11' order by one desc, two desc limit 5 PREHOOK: type: QUERY @@ -141,6 +194,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -157,7 +212,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: -- Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -174,6 +228,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: db2.destintable + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2011-11-11' (type: string) + outputColumnNames: one, two, ds + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(one, 16), compute_stats(two, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -190,6 +260,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: one, two + Column Types: string, string + Table: db2.destintable + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE db2.destinTable PARTITION (ds='2011-11-11') SELECT one,two FROM db1.sourceTable WHERE ds='2011-11-11' order by one desc, two desc limit 5 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/insert_into1.q.out b/ql/src/test/results/clientpositive/insert_into1.q.out index da863a7185..46c0512a5b 100644 --- a/ql/src/test/results/clientpositive/insert_into1.q.out +++ b/ql/src/test/results/clientpositive/insert_into1.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -34,7 +36,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -56,6 +57,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -70,6 +86,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * from src ORDER BY key LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -126,6 +171,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -142,7 +189,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -164,6 +210,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -178,6 +239,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -234,6 +324,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -250,7 +342,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -272,6 +363,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -286,6 +392,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -344,6 +479,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -368,6 +504,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: 1 (type: int), 'a' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -391,6 +553,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -441,6 +610,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -465,6 +635,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: 2 (type: int), 'b' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -488,6 +684,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/insert_into2.q.out b/ql/src/test/results/clientpositive/insert_into2.q.out index 46fab7b416..0469bfe024 100644 --- a/ql/src/test/results/clientpositive/insert_into2.q.out +++ b/ql/src/test/results/clientpositive/insert_into2.q.out @@ -22,6 +22,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -38,7 +40,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -60,6 +61,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -76,6 +93,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE insert_into2 PARTITION (ds='1') SELECT * FROM src order by key limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -171,6 +224,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -187,7 +242,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -209,6 +263,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -225,6 +295,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 100 PREHOOK: type: QUERY @@ -289,6 +395,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -305,7 +413,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -327,6 +434,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -343,6 +466,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 50 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/insert_into3.q.out b/ql/src/test/results/clientpositive/insert_into3.q.out index ae7523b217..e1e943de93 100644 --- a/ql/src/test/results/clientpositive/insert_into3.q.out +++ b/ql/src/test/results/clientpositive/insert_into3.q.out @@ -32,9 +32,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -51,7 +55,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -82,6 +85,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -96,15 +114,50 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3a + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3b + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -125,6 +178,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -136,9 +204,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT INTO TABLE insert_into3a SELECT * ORDER BY key, value LIMIT 50 INSERT INTO TABLE insert_into3b SELECT * ORDER BY key, value LIMIT 100 PREHOOK: type: QUERY @@ -191,9 +281,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -212,7 +306,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Select Operator expressions: key (type: string), value (type: string) @@ -247,6 +340,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -261,14 +369,49 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3a + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3b + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -290,6 +433,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -301,9 +459,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE insert_into3a SELECT * LIMIT 10 INSERT INTO TABLE insert_into3b SELECT * LIMIT 10 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/insert_into4.q.out b/ql/src/test/results/clientpositive/insert_into4.q.out index bb4e5571a6..c24ecdd5b6 100644 --- a/ql/src/test/results/clientpositive/insert_into4.q.out +++ b/ql/src/test/results/clientpositive/insert_into4.q.out @@ -30,6 +30,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -48,7 +50,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -70,6 +71,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into4a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -84,6 +100,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into4a + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE insert_into4a SELECT * FROM src LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -115,6 +160,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -133,7 +180,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -155,6 +201,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into4a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -169,6 +230,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into4a + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE insert_into4a SELECT * FROM src LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -202,6 +292,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -225,6 +316,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into4b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -248,6 +365,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into4b + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/insert_into5.q.out b/ql/src/test/results/clientpositive/insert_into5.q.out index 7b471f4433..1eaebc731b 100644 --- a/ql/src/test/results/clientpositive/insert_into5.q.out +++ b/ql/src/test/results/clientpositive/insert_into5.q.out @@ -30,6 +30,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -48,7 +50,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -66,6 +67,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into5a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -80,6 +96,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into5a + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE insert_into5a SELECT 1, 'one' FROM src LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -113,6 +158,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -136,6 +182,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into5a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -159,6 +231,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into5a + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -224,6 +303,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -247,6 +327,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into5b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -272,6 +386,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into5b + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -339,6 +460,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -362,6 +484,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into5b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -387,6 +543,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into5b + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/insert_into6.q.out b/ql/src/test/results/clientpositive/insert_into6.q.out index d93a167a74..ba062f1f05 100644 --- a/ql/src/test/results/clientpositive/insert_into6.q.out +++ b/ql/src/test/results/clientpositive/insert_into6.q.out @@ -32,6 +32,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -50,7 +52,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 150 Data size: 1500 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -72,6 +73,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into6a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 150 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 150 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -88,6 +105,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into6a + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 150 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 75 Data size: 750 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 75 Data size: 750 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 75 Data size: 750 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE insert_into6a PARTITION (ds='1') SELECT * FROM src LIMIT 150 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -137,6 +190,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -160,6 +214,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into6b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -185,6 +273,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into6b + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/insert_values_orig_table_use_metadata.q.out b/ql/src/test/results/clientpositive/insert_values_orig_table_use_metadata.q.out index 0d8e7790e1..7ce6861f6c 100644 --- a/ql/src/test/results/clientpositive/insert_values_orig_table_use_metadata.q.out +++ b/ql/src/test/results/clientpositive/insert_values_orig_table_use_metadata.q.out @@ -305,6 +305,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} numFiles 1 numRows 0 rawDataSize 0 @@ -427,6 +428,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} numFiles 2 numRows 0 rawDataSize 0 @@ -677,7 +679,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -869,6 +871,7 @@ Database: default Table: sp #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 0 rawDataSize 0 @@ -920,7 +923,7 @@ Database: default Table: sp #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 502 rawDataSize 5318 diff --git a/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out b/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out index c2732c8896..db70465f30 100644 --- a/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out +++ b/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out @@ -100,6 +100,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -130,6 +132,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.temp1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: change, num + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(change, 16), compute_stats(num, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -144,6 +161,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: change, num + Column Types: string, string + Table: default.temp1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: CREATE TABLE temp2 ( create_ts STRING , @@ -178,6 +224,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -209,6 +257,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.temp2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: create_ts, change, num + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(create_ts, 16), compute_stats(change, 16), compute_stats(num, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -223,6 +286,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: create_ts, change, num + Column Types: string, string, string + Table: default.temp2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: select * from bucketoutput1 a join bucketoutput2 b on (a.data=b.data) PREHOOK: type: QUERY PREHOOK: Input: default@bucketoutput1 diff --git a/ql/src/test/results/clientpositive/join14.q.out b/ql/src/test/results/clientpositive/join14.q.out index 66e42f1992..03c3dbacfd 100644 --- a/ql/src/test/results/clientpositive/join14.q.out +++ b/ql/src/test/results/clientpositive/join14.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -75,6 +77,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c1, c2 + Statistics: Num rows: 366 Data size: 3890 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -89,6 +106,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/join17.q.out b/ql/src/test/results/clientpositive/join17.q.out index f9edc792eb..13d80c024b 100644 --- a/ql/src/test/results/clientpositive/join17.q.out +++ b/ql/src/test/results/clientpositive/join17.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -162,6 +164,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(value1, 16), compute_stats(key2, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -195,6 +224,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/join25.q.out b/ql/src/test/results/clientpositive/join25.q.out index 5ad95c507b..99b2bb12b5 100644 --- a/ql/src/test/results/clientpositive/join25.q.out +++ b/ql/src/test/results/clientpositive/join25.q.out @@ -17,13 +17,15 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -46,7 +48,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +81,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -95,6 +112,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join26.q.out b/ql/src/test/results/clientpositive/join26.q.out index b41fd8efe1..ac25b91d39 100644 --- a/ql/src/test/results/clientpositive/join26.q.out +++ b/ql/src/test/results/clientpositive/join26.q.out @@ -19,13 +19,15 @@ FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.key = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-5 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:y @@ -74,7 +76,7 @@ STAGE PLANS: 2 _col0 (type: string) Position of Big Table: 2 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -136,6 +138,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -316,6 +345,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join27.q.out b/ql/src/test/results/clientpositive/join27.q.out index 8b43f3f32a..0457943af0 100644 --- a/ql/src/test/results/clientpositive/join27.q.out +++ b/ql/src/test/results/clientpositive/join27.q.out @@ -17,13 +17,15 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.value = y.value) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -46,7 +48,7 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +81,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -95,6 +112,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.value = y.value) diff --git a/ql/src/test/results/clientpositive/join28.q.out b/ql/src/test/results/clientpositive/join28.q.out index 309bdcd200..58ffed03a7 100644 --- a/ql/src/test/results/clientpositive/join28.q.out +++ b/ql/src/test/results/clientpositive/join28.q.out @@ -23,13 +23,15 @@ FROM JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 Stage-2 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:z @@ -70,7 +72,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -115,6 +117,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -131,6 +148,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq.key1, z.value FROM diff --git a/ql/src/test/results/clientpositive/join29.q.out b/ql/src/test/results/clientpositive/join29.q.out index b53143de05..7b69c4fafc 100644 --- a/ql/src/test/results/clientpositive/join29.q.out +++ b/ql/src/test/results/clientpositive/join29.q.out @@ -20,15 +20,17 @@ FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1, Stage-4 , consists of Stage-8, Stage-9, Stage-2 - Stage-8 has a backup stage: Stage-2 - Stage-5 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-2, Stage-5, Stage-6 - Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-1, Stage-5 , consists of Stage-9, Stage-10, Stage-2 Stage-9 has a backup stage: Stage-2 Stage-6 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-2, Stage-6, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2, Stage-6, Stage-7 + Stage-10 has a backup stage: Stage-2 + Stage-7 depends on stages: Stage-10 Stage-2 - Stage-4 is a root stage + Stage-5 is a root stage STAGE PLANS: Stage: Stage-1 @@ -70,10 +72,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME1 @@ -87,7 +89,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -111,6 +113,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, cnt1, cnt2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt1, 16), compute_stats(cnt2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -127,7 +144,36 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-9 + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, cnt1, cnt2 + Column Types: string, int, int + Table: default.dest_j1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -141,7 +187,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -165,6 +211,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, cnt1, cnt2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt1, 16), compute_stats(cnt2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -206,8 +267,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, cnt1, cnt2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt1, 16), compute_stats(cnt2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/join3.q.out b/ql/src/test/results/clientpositive/join3.q.out index fb378f403a..f3d198d049 100644 --- a/ql/src/test/results/clientpositive/join3.q.out +++ b/ql/src/test/results/clientpositive/join3.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -92,6 +94,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -106,6 +123,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/join30.q.out b/ql/src/test/results/clientpositive/join30.q.out index 3bd6db16f7..b82e287cfc 100644 --- a/ql/src/test/results/clientpositive/join30.q.out +++ b/ql/src/test/results/clientpositive/join30.q.out @@ -15,13 +15,15 @@ INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 + Stage-7 is a root stage + Stage-2 depends on stages: Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$hdt$_0:x @@ -98,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +129,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest_j1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/join31.q.out b/ql/src/test/results/clientpositive/join31.q.out index 32eab4febb..a36baa12bf 100644 --- a/ql/src/test/results/clientpositive/join31.q.out +++ b/ql/src/test/results/clientpositive/join31.q.out @@ -22,10 +22,12 @@ group by subq1.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-7 + Stage-8 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-8 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-4, Stage-5 + Stage-5 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -60,7 +62,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$hdt$_1:y @@ -133,6 +135,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -147,6 +164,35 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.dest_j1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq1.key, count(1) as cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN diff --git a/ql/src/test/results/clientpositive/join32.q.out b/ql/src/test/results/clientpositive/join32.q.out index a191284aca..16ad8a5514 100644 --- a/ql/src/test/results/clientpositive/join32.q.out +++ b/ql/src/test/results/clientpositive/join32.q.out @@ -19,13 +19,15 @@ FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:y @@ -72,7 +74,7 @@ STAGE PLANS: 1 _col1 (type: string) Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -141,6 +143,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -321,6 +350,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join33.q.out b/ql/src/test/results/clientpositive/join33.q.out index a191284aca..16ad8a5514 100644 --- a/ql/src/test/results/clientpositive/join33.q.out +++ b/ql/src/test/results/clientpositive/join33.q.out @@ -19,13 +19,15 @@ FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:y @@ -72,7 +74,7 @@ STAGE PLANS: 1 _col1 (type: string) Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -141,6 +143,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -321,6 +350,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join34.q.out b/ql/src/test/results/clientpositive/join34.q.out index 67599bc991..2b8ae56ee4 100644 --- a/ql/src/test/results/clientpositive/join34.q.out +++ b/ql/src/test/results/clientpositive/join34.q.out @@ -27,13 +27,15 @@ FROM JOIN src1 x ON (x.key = subq1.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-5 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:x @@ -59,7 +61,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 0 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -121,6 +123,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false TableScan alias: x1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -180,6 +209,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -312,6 +368,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, x.value, subq1.value FROM diff --git a/ql/src/test/results/clientpositive/join35.q.out b/ql/src/test/results/clientpositive/join35.q.out index 7ece4eff40..12e9f1ea14 100644 --- a/ql/src/test/results/clientpositive/join35.q.out +++ b/ql/src/test/results/clientpositive/join35.q.out @@ -28,11 +28,13 @@ JOIN src1 x ON (x.key = subq1.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1, Stage-4 - Stage-6 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-1, Stage-5 + Stage-7 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-7 Stage-3 depends on stages: Stage-0 - Stage-4 is a root stage + Stage-9 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-7 + Stage-5 is a root stage STAGE PLANS: Stage: Stage-1 @@ -143,7 +145,7 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:x @@ -169,7 +171,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -221,6 +223,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, value, val2 + Statistics: Num rows: 182 Data size: 1938 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false TableScan GatherStats: false Union @@ -270,6 +299,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, value, val2 + Statistics: Num rows: 182 Data size: 1938 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -299,7 +355,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe #### A masked pattern was here #### Partition - base file name: -mr-10003 + base file name: -mr-10004 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: @@ -400,10 +456,87 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, int + Table: default.dest_j1 + Is Table Level Stats: true + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan alias: x1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false diff --git a/ql/src/test/results/clientpositive/join36.q.out b/ql/src/test/results/clientpositive/join36.q.out index 43a091f629..9971a9ae9d 100644 --- a/ql/src/test/results/clientpositive/join36.q.out +++ b/ql/src/test/results/clientpositive/join36.q.out @@ -57,13 +57,15 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt FROM tmp1 x JOIN tmp2 y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -86,7 +88,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -119,6 +121,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, value, val2 + Statistics: Num rows: 339 Data size: 1630 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -135,6 +152,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, int, int + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt FROM tmp1 x JOIN tmp2 y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join37.q.out b/ql/src/test/results/clientpositive/join37.q.out index b0a2ee3dfa..eb50db5c17 100644 --- a/ql/src/test/results/clientpositive/join37.q.out +++ b/ql/src/test/results/clientpositive/join37.q.out @@ -17,13 +17,15 @@ SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -46,7 +48,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +81,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -95,6 +112,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join39.q.out b/ql/src/test/results/clientpositive/join39.q.out index c656762675..b552fde024 100644 --- a/ql/src/test/results/clientpositive/join39.q.out +++ b/ql/src/test/results/clientpositive/join39.q.out @@ -17,13 +17,15 @@ SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src @@ -46,7 +48,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -72,6 +74,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, key1, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(key1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -88,6 +105,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value, key1, val2 + Column Types: string, string, string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join4.q.out b/ql/src/test/results/clientpositive/join4.q.out index 0bd1edd6af..b7eaf41124 100644 --- a/ql/src/test/results/clientpositive/join4.q.out +++ b/ql/src/test/results/clientpositive/join4.q.out @@ -40,6 +40,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -98,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +129,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/join43.q.out b/ql/src/test/results/clientpositive/join43.q.out index 24168ca4fe..fac8a8cb2d 100644 --- a/ql/src/test/results/clientpositive/join43.q.out +++ b/ql/src/test/results/clientpositive/join43.q.out @@ -209,46 +209,46 @@ from ( ) list POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 is a root stage Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: purchase_history - Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE + alias: cart_history + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: s is not null (type: boolean) - Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), time (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) TableScan - alias: cart_history - Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + alias: purchase_history + Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: s is not null (type: boolean) - Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), time (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Reduce Operator Tree: Join Operator @@ -257,14 +257,14 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col1 > _col3) (type: boolean) + predicate: (_col3 > _col1) (type: boolean) Statistics: Num rows: 2 Data size: 13 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(_col3) - keys: _col0 (type: string), _col1 (type: int) + aggregations: max(_col1) + keys: _col2 (type: string), _col3 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -275,7 +275,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -306,16 +306,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col2 (type: int) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - TableScan alias: events Statistics: Num rows: 6 Data size: 79 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -330,22 +324,32 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 6 Data size: 79 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col2 (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string), _col2 (type: int) - 1 _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0 + 0 _col0 (type: string), _col1 (type: int) + 1 _col0 (type: string), _col2 (type: int) + outputColumnNames: _col2 Statistics: Num rows: 6 Data size: 86 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col2 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 86 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 86 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -427,46 +431,46 @@ from ( ) list POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 is a root stage Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: purchase_history - Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE + alias: cart_history + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: s is not null (type: boolean) - Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), time (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) TableScan - alias: cart_history - Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + alias: purchase_history + Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: s is not null (type: boolean) - Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), time (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Reduce Operator Tree: Join Operator @@ -475,14 +479,14 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col1 > _col3) (type: boolean) + predicate: (_col3 > _col1) (type: boolean) Statistics: Num rows: 2 Data size: 13 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(_col3) - keys: _col0 (type: string), _col1 (type: int) + aggregations: max(_col1) + keys: _col2 (type: string), _col3 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -493,7 +497,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -520,17 +524,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col2 (type: int) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - TableScan alias: events Statistics: Num rows: 6 Data size: 79 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -546,17 +543,24 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col3 (type: int) Statistics: Num rows: 6 Data size: 79 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col2 (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string), _col2 (type: int) - 1 _col0 (type: string), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2, _col4, _col5 + 0 _col0 (type: string), _col3 (type: int) + 1 _col0 (type: string), _col2 (type: int) + outputColumnNames: _col1, _col2, _col4, _col5, _col6 Statistics: Num rows: 6 Data size: 86 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col4 (type: string), _col5 (type: int) + expressions: _col4 (type: string), _col5 (type: int), _col6 (type: int), _col1 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 6 Data size: 86 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/join5.q.out b/ql/src/test/results/clientpositive/join5.q.out index d981c742ac..8925e7e000 100644 --- a/ql/src/test/results/clientpositive/join5.q.out +++ b/ql/src/test/results/clientpositive/join5.q.out @@ -40,6 +40,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -98,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +129,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/join6.q.out b/ql/src/test/results/clientpositive/join6.q.out index 77c8c3a322..19b12e83c5 100644 --- a/ql/src/test/results/clientpositive/join6.q.out +++ b/ql/src/test/results/clientpositive/join6.q.out @@ -40,6 +40,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -98,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +129,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/join7.q.out b/ql/src/test/results/clientpositive/join7.q.out index fec67b2651..ac61b1a354 100644 --- a/ql/src/test/results/clientpositive/join7.q.out +++ b/ql/src/test/results/clientpositive/join7.q.out @@ -50,6 +50,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -126,6 +128,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -140,6 +157,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6 + Column Types: int, string, int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/join8.q.out b/ql/src/test/results/clientpositive/join8.q.out index 124b47ec95..4f5f041fd7 100644 --- a/ql/src/test/results/clientpositive/join8.q.out +++ b/ql/src/test/results/clientpositive/join8.q.out @@ -40,6 +40,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -101,6 +103,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -115,6 +132,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/join9.q.out b/ql/src/test/results/clientpositive/join9.q.out index a96f341c78..36b64aca0d 100644 --- a/ql/src/test/results/clientpositive/join9.q.out +++ b/ql/src/test/results/clientpositive/join9.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -210,6 +212,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -243,6 +272,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/join_map_ppr.q.out b/ql/src/test/results/clientpositive/join_map_ppr.q.out index a4d414089e..1759c1c0af 100644 --- a/ql/src/test/results/clientpositive/join_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/join_map_ppr.q.out @@ -27,6 +27,7 @@ STAGE DEPENDENCIES: Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 Stage-4 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-4 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 @@ -131,6 +132,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -186,6 +203,35 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-9 Conditional Operator @@ -228,6 +274,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + Stage: Stage-5 Map Reduce Map Operator Tree: @@ -605,6 +659,7 @@ STAGE DEPENDENCIES: Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 Stage-4 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-4 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 @@ -688,7 +743,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -709,6 +764,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -764,6 +835,35 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-9 Conditional Operator @@ -783,7 +883,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -806,6 +906,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + Stage: Stage-5 Map Reduce Map Operator Tree: @@ -820,7 +928,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -850,7 +958,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -871,7 +979,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -907,7 +1015,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -937,7 +1045,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -958,7 +1066,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 diff --git a/ql/src/test/results/clientpositive/lb_fs_stats.q.out b/ql/src/test/results/clientpositive/lb_fs_stats.q.out index b07192b895..9bc96e4685 100644 --- a/ql/src/test/results/clientpositive/lb_fs_stats.q.out +++ b/ql/src/test/results/clientpositive/lb_fs_stats.q.out @@ -46,7 +46,7 @@ Database: default Table: test_tab #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 4812 diff --git a/ql/src/test/results/clientpositive/limit_pushdown_negative.q.out b/ql/src/test/results/clientpositive/limit_pushdown_negative.q.out index 35e68278c7..7e25f6f2c0 100644 --- a/ql/src/test/results/clientpositive/limit_pushdown_negative.q.out +++ b/ql/src/test/results/clientpositive/limit_pushdown_negative.q.out @@ -237,9 +237,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -279,6 +283,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -308,14 +327,49 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, c1 + Column Types: string, int + Table: default.dest_2 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, c1 + Column Types: string, int + Table: default.dest_3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 value expressions: _col0 (type: string), _col1 (type: double) Reduce Operator Tree: Select Operator @@ -337,6 +391,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, c1 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -348,6 +417,28 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_3 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/lineage1.q.out b/ql/src/test/results/clientpositive/lineage1.q.out index 6c8a22fa9c..c66357e9df 100644 --- a/ql/src/test/results/clientpositive/lineage1.q.out +++ b/ql/src/test/results/clientpositive/lineage1.q.out @@ -39,6 +39,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-12 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -111,6 +112,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_l1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE @@ -126,6 +140,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_l1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -149,6 +189,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_l1 + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out index f70f8b2b9a..ff0ad8be14 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -65,6 +66,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -167,6 +187,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -200,6 +254,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -240,7 +302,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -285,7 +347,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -357,7 +419,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out index 6920dcc7ca..e3b6f36133 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -120,6 +140,40 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -153,6 +207,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from src PREHOOK: type: QUERY @@ -195,7 +257,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 @@ -238,7 +300,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out index 55acbb7f5a..990ba9fb0a 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: col1, col2, col3, col4, col5, ds, hr + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(col1, 16), compute_stats(col2, 16), compute_stats(col3, 16), compute_stats(col4, 16), compute_stats(col5, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -120,6 +140,40 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types struct:struct:struct:struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -153,6 +207,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: col1, col2, col3, col4, col5 + Column Types: string, string, string, string, string + Table: default.list_bucketing_mul_col + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') select 1, key, 1, value, 1 from src PREHOOK: type: QUERY @@ -201,7 +263,7 @@ Database: default Table: list_bucketing_mul_col #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\"}} numFiles 4 numRows 500 rawDataSize 6312 @@ -246,7 +308,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2":"true","col3":"true","col4":"true","col5":"true"}} bucket_count -1 column.name.delimiter , columns col1,col2,col3,col4,col5 @@ -338,7 +400,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2":"true","col3":"true","col4":"true","col5":"true"}} bucket_count -1 column.name.delimiter , columns col1,col2,col3,col4,col5 diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out index 3a1d2a436a..845546768a 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), '2008-04-08' (type: string), '2013-01-23+18:00:99' (type: string) + outputColumnNames: col1, col2, col3, col4, col5, ds, hr + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(col1, 16), compute_stats(col2, 16), compute_stats(col3, 16), compute_stats(col4, 16), compute_stats(col5, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -120,6 +140,40 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types struct:struct:struct:struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -153,6 +207,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: col1, col2, col3, col4, col5 + Column Types: string, string, string, string, string + Table: default.list_bucketing_mul_col + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') select 1, key, 1, value, 1 from src PREHOOK: type: QUERY @@ -201,7 +263,7 @@ Database: default Table: list_bucketing_mul_col #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\"}} numFiles 4 numRows 500 rawDataSize 6312 @@ -246,7 +308,7 @@ STAGE PLANS: ds 2008-04-08 hr 2013-01-23+18:00:99 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2":"true","col3":"true","col4":"true","col5":"true"}} bucket_count -1 column.name.delimiter , columns col1,col2,col3,col4,col5 diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_14.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_14.q.out index f827991243..214ff57c57 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_14.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_14.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -65,6 +66,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -116,6 +133,35 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -149,6 +195,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing + Is Table Level Stats: true + PREHOOK: query: insert overwrite table list_bucketing select * from src PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -177,7 +231,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -283,7 +337,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -304,7 +358,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out index 01bc19cbbc..7b72c66ed7 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +191,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +258,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -250,7 +312,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 1000 rawDataSize 9624 @@ -311,7 +373,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_3.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_3.q.out index 52646a2609..de4e086dab 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_3.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_3.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -59,6 +60,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -161,6 +181,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -194,6 +248,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds='2008-04-08', hr='11') select key, value from srcpart where ds='2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -231,7 +293,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 1000 rawDataSize 10624 @@ -303,7 +365,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out index 84ada2bd49..9e8560a8c4 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +191,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +258,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -250,7 +312,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 1000 rawDataSize 9624 @@ -286,6 +348,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -331,6 +394,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -433,6 +515,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -474,6 +590,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + Stage: Stage-3 Merge File Operator Map Operator Tree: @@ -624,7 +748,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 1000 rawDataSize 9624 @@ -685,7 +809,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out index 71d7e16409..3e55d2a6ae 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -65,6 +66,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -167,6 +187,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -200,6 +254,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -240,7 +302,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -285,7 +347,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -362,7 +424,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -408,7 +470,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out index 8e20ff1301..5217a3f9fc 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +191,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +258,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -254,7 +316,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 16 rawDataSize 136 @@ -297,7 +359,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 984 rawDataSize 9488 @@ -333,6 +395,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -379,6 +442,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -481,6 +563,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -523,6 +639,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false + Stage: Stage-3 Merge File Operator Map Operator Tree: @@ -681,7 +805,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 16 rawDataSize 136 @@ -724,7 +848,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 984 rawDataSize 9488 @@ -785,7 +909,7 @@ STAGE PLANS: ds 2008-04-08 hr a1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -831,7 +955,7 @@ STAGE PLANS: ds 2008-04-08 hr b1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out index 58bf240f1b..286a0457e4 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +191,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +258,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -254,7 +316,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 16 rawDataSize 136 @@ -297,7 +359,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 984 rawDataSize 9488 @@ -333,6 +395,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -379,6 +442,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -481,6 +563,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -523,6 +639,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false + Stage: Stage-3 Merge File Operator Map Operator Tree: @@ -681,7 +805,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 16 rawDataSize 136 @@ -724,7 +848,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 984 rawDataSize 9488 @@ -785,7 +909,7 @@ STAGE PLANS: ds 2008-04-08 hr a1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -831,7 +955,7 @@ STAGE PLANS: ds 2008-04-08 hr b1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out index b38d332e09..071f0deada 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +191,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +258,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -254,7 +316,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 16 rawDataSize 136 @@ -297,7 +359,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 984 rawDataSize 9488 @@ -414,7 +476,7 @@ STAGE PLANS: ds 2008-04-08 hr a1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out index 624e3aca41..221b3c985d 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +191,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +258,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -250,7 +312,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 1000 rawDataSize 9624 @@ -286,6 +348,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -331,6 +394,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -433,6 +515,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -474,6 +590,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + Stage: Stage-3 Merge File Operator Map Operator Tree: @@ -624,7 +748,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 1000 rawDataSize 9624 @@ -685,7 +809,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out index 17c7afe411..98b9b1304a 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out @@ -47,7 +47,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -97,7 +97,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -181,7 +181,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -266,7 +266,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -349,7 +349,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out index 5a326f667c..4cafefa812 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out @@ -47,7 +47,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -97,7 +97,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -181,7 +181,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -268,7 +268,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out index 3ff221da35..9931afd435 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out @@ -43,7 +43,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -103,7 +103,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -168,7 +168,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -211,7 +211,7 @@ STAGE PLANS: ds 1 hr 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -322,7 +322,7 @@ STAGE PLANS: ds 1 hr 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -408,7 +408,7 @@ STAGE PLANS: ds 1 hr 3 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out b/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out index 357ae7bdaf..0a9b906906 100644 --- a/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out @@ -86,6 +86,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"a":"true"}} bucket_count 16 bucket_field_name a column.name.delimiter , @@ -109,6 +110,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"a":"true"}} bucket_count 16 bucket_field_name a column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out b/ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out index 0d5ba01960..f0cf74ca81 100644 --- a/ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out +++ b/ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out @@ -89,7 +89,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 1500 rawDataSize 141000 @@ -241,7 +241,7 @@ Database: default Table: src_orc_merge_test_part_stat #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 1500 rawDataSize 141000 @@ -290,7 +290,7 @@ Database: default Table: src_orc_merge_test_part_stat #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 1500 rawDataSize 141000 diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_10.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_10.q.out new file mode 100644 index 0000000000..09529d26e5 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_10.q.out @@ -0,0 +1,452 @@ +PREHOOK: query: drop table p +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table p +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE p(insert_num int, c1 tinyint, c2 smallint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@p +POSTHOOK: query: CREATE TABLE p(insert_num int, c1 tinyint, c2 smallint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@p +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 tinyint +c2 smallint + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\",\"insert_num\":\"true\"}} + numFiles 0 + numRows 0 + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert into p values (1,22,333) +PREHOOK: type: QUERY +PREHOOK: Output: default@p +POSTHOOK: query: insert into p values (1,22,333) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p +POSTHOOK: Lineage: p.c1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: p.c2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: p.insert_num EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 tinyint +c2 smallint + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\",\"insert_num\":\"true\"}} + numFiles 1 + numRows 1 + rawDataSize 8 + totalSize 9 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: alter table p replace columns (insert_num int, c1 STRING, c2 STRING) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@p +PREHOOK: Output: default@p +POSTHOOK: query: alter table p replace columns (insert_num int, c1 STRING, c2 STRING) +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@p +POSTHOOK: Output: default@p +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 string +c2 string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} +#### A masked pattern was here #### + numFiles 1 + numRows 1 + rawDataSize 8 + totalSize 9 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted p insert_num +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p insert_num +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +insert_num int 1 1 0 1 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} +PREHOOK: query: desc formatted p c1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p c1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +c1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} +PREHOOK: query: insert into p values (2,11,111) +PREHOOK: type: QUERY +PREHOOK: Output: default@p +POSTHOOK: query: insert into p values (2,11,111) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p +POSTHOOK: Lineage: p.c1 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: p.c2 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: p.insert_num EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 string +c2 string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} +#### A masked pattern was here #### + numFiles 2 + numRows 2 + rawDataSize 16 + totalSize 18 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted p insert_num +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p insert_num +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +insert_num int 1 2 0 1 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} +PREHOOK: query: desc formatted p c1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p c1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +c1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} +PREHOOK: query: drop table p +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@p +PREHOOK: Output: default@p +POSTHOOK: query: drop table p +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@p +POSTHOOK: Output: default@p +PREHOOK: query: CREATE TABLE p(insert_num int, c1 tinyint, c2 smallint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@p +POSTHOOK: query: CREATE TABLE p(insert_num int, c1 tinyint, c2 smallint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@p +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 tinyint +c2 smallint + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\",\"insert_num\":\"true\"}} + numFiles 0 + numRows 0 + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert into p values (1,22,333) +PREHOOK: type: QUERY +PREHOOK: Output: default@p +POSTHOOK: query: insert into p values (1,22,333) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p +POSTHOOK: Lineage: p.c1 EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: p.c2 EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: p.insert_num EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 tinyint +c2 smallint + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 1 + rawDataSize 8 + totalSize 9 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: alter table p replace columns (insert_num int, c1 STRING, c2 STRING) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@p +PREHOOK: Output: default@p +POSTHOOK: query: alter table p replace columns (insert_num int, c1 STRING, c2 STRING) +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@p +POSTHOOK: Output: default@p +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 string +c2 string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +#### A masked pattern was here #### + numFiles 1 + numRows 1 + rawDataSize 8 + totalSize 9 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted p insert_num +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p insert_num +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +insert_num int from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: desc formatted p c1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p c1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +c1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: insert into p values (2,11,111) +PREHOOK: type: QUERY +PREHOOK: Output: default@p +POSTHOOK: query: insert into p values (2,11,111) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p +POSTHOOK: Lineage: p.c1 SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: p.c2 SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: p.insert_num EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 string +c2 string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +#### A masked pattern was here #### + numFiles 2 + numRows 2 + rawDataSize 16 + totalSize 18 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted p insert_num +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p insert_num +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +insert_num int from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: desc formatted p c1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p c1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +c1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} diff --git a/ql/src/test/results/clientpositive/llap/auto_join1.q.out b/ql/src/test/results/clientpositive/llap/auto_join1.q.out index 6a0a1d5d09..e85e4f1074 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join1.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join1.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -26,6 +27,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -81,8 +83,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1219 Data size: 115805 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -100,6 +130,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out index 841ef1456d..d05cc8cca9 100644 --- a/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out @@ -64,40 +64,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -107,10 +107,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -174,25 +174,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -200,17 +200,17 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap Reducer 2 Execution mode: llap @@ -219,17 +219,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -238,10 +238,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -330,25 +330,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -356,43 +356,43 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -400,18 +400,18 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap Reducer 2 @@ -422,12 +422,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -439,14 +439,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -459,12 +459,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -547,40 +547,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -590,10 +590,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -664,40 +664,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -707,10 +707,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -805,40 +805,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -848,10 +848,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -934,40 +934,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -977,10 +977,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1042,38 +1042,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1085,15 +1085,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -1102,10 +1102,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1162,40 +1162,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1205,10 +1205,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1271,36 +1271,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -1309,15 +1309,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1327,10 +1327,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1409,40 +1409,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1452,10 +1452,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1533,6 +1533,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -1540,30 +1542,34 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -1571,32 +1577,88 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Execution mode: llap + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1614,6 +1676,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -1747,6 +1823,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -1755,32 +1833,34 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -1788,53 +1868,109 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1852,6 +1988,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out index ab94c37fed..c1459d53ef 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out @@ -80,14 +80,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -96,15 +96,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 5 - Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -112,14 +112,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -128,15 +128,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 5 - Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -144,24 +144,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 3 @@ -171,10 +171,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -248,34 +248,34 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: int) mode: final outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -284,15 +284,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 0 Map 1 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -303,10 +303,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out index 9c37848896..0bb5b0fedd 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out @@ -71,6 +71,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -79,21 +81,23 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -102,54 +106,110 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + 1 Map 4 + Statistics: Num rows: 16 Data size: 2976 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 2 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -167,6 +227,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -287,6 +361,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -295,77 +371,122 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Map Operator Tree: + TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2976 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reducer 2 Execution mode: llap - LLAP IO: no inputs + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -383,6 +504,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -503,6 +638,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -511,77 +648,122 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Map Operator Tree: + TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2976 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reducer 2 Execution mode: llap - LLAP IO: no inputs + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -599,6 +781,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out index 21f1b3faff..72d2c62e5b 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out @@ -57,11 +57,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -70,15 +70,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -86,16 +86,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -105,10 +105,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -164,27 +164,27 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 189 Data size: 1891 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 189 Data size: 756 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 189 Data size: 1891 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 189 Data size: 756 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -193,15 +193,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 0 Map 1 - Statistics: Num rows: 207 Data size: 2080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 189 Data size: 1512 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -212,10 +212,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_15.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_15.q.out index 03ff5a6659..91b1d8ea67 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_15.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_15.q.out @@ -57,11 +57,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -70,15 +70,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -86,16 +86,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -105,10 +105,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -143,27 +143,27 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -172,15 +172,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 0 Map 1 - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -191,10 +191,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out index 9e6053675b..34bea411b2 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out @@ -84,46 +84,30 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap + LLAP IO: no inputs Map 5 Map Operator Tree: TableScan @@ -143,6 +127,25 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -152,27 +155,43 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -211,46 +230,30 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap + LLAP IO: no inputs Map 5 Map Operator Tree: TableScan @@ -270,6 +273,25 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -279,27 +301,43 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -338,46 +376,30 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap + LLAP IO: no inputs Map 5 Map Operator Tree: TableScan @@ -397,6 +419,25 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -406,27 +447,43 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -465,46 +522,30 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap + LLAP IO: no inputs Map 5 Map Operator Tree: TableScan @@ -524,6 +565,25 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -533,27 +593,43 @@ STAGE PLANS: keys: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(_col0) (type: double) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 1219 Data size: 4876 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1219 Data size: 4876 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3109 Data size: 24872 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -598,36 +674,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -636,15 +712,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3253 Data size: 26024 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -654,10 +730,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -696,63 +772,66 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap + LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -764,27 +843,43 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -823,46 +918,30 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap + LLAP IO: no inputs Map 5 Map Operator Tree: TableScan @@ -882,6 +961,25 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -891,27 +989,43 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -950,46 +1064,30 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap + LLAP IO: no inputs Map 5 Map Operator Tree: TableScan @@ -1009,6 +1107,25 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1018,27 +1135,43 @@ STAGE PLANS: keys: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(_col0) (type: double) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 1219 Data size: 4876 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1219 Data size: 4876 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3109 Data size: 24872 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1083,36 +1216,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -1121,15 +1254,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3253 Data size: 26024 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1139,10 +1272,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1181,63 +1314,66 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap + LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1249,27 +1385,43 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out index bdb30d735b..65039a2d6c 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out @@ -65,14 +65,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -81,15 +81,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -97,19 +97,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -119,10 +119,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -180,14 +180,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -197,18 +197,18 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -216,19 +216,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -239,10 +239,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -316,14 +316,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -333,36 +333,36 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 4 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -372,17 +372,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -391,10 +391,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -484,14 +484,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -501,18 +501,18 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1921,19 +1921,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2415,7 +2415,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2425,14 +2425,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 1 Reducer 5 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2445,12 +2445,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -2534,14 +2534,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2550,15 +2550,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2566,19 +2566,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2588,10 +2588,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2663,14 +2663,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2679,15 +2679,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2695,19 +2695,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2717,10 +2717,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2816,14 +2816,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2832,15 +2832,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2848,19 +2848,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2870,10 +2870,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2957,14 +2957,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2973,15 +2973,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2989,19 +2989,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3011,10 +3011,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3126,10 +3126,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3185,14 +3185,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -3201,15 +3201,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -3217,19 +3217,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3239,10 +3239,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3308,14 +3308,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -3362,19 +3362,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3384,10 +3384,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3521,10 +3521,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/bucket2.q.out b/ql/src/test/results/clientpositive/llap/bucket2.q.out index e0c92ced9f..d446f3c21e 100644 --- a/ql/src/test/results/clientpositive/llap/bucket2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket2.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -140,6 +141,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -177,6 +213,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket2_1 + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucket2_1 select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/bucket3.q.out b/ql/src/test/results/clientpositive/llap/bucket3.q.out index 8e6d85ca80..3bc765e6b6 100644 --- a/ql/src/test/results/clientpositive/llap/bucket3.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket3.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -26,6 +27,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -138,6 +140,61 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: true + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -174,6 +231,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket3_1 + Is Table Level Stats: false + PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1') select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/bucket4.q.out b/ql/src/test/results/clientpositive/llap/bucket4.q.out index 5fbffc96a9..fb55e81f51 100644 --- a/ql/src/test/results/clientpositive/llap/bucket4.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket4.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -142,6 +143,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -180,6 +216,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket4_1 + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucket4_1 select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/bucket5.q.out b/ql/src/test/results/clientpositive/llap/bucket5.q.out index 0b5a14d6ff..0e90d96884 100644 --- a/ql/src/test/results/clientpositive/llap/bucket5.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket5.q.out @@ -29,6 +29,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2, Stage-7, Stage-6, Stage-9 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-4, Stage-5 + Stage-12 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 Stage-10 depends on stages: Stage-2 , consists of Stage-7, Stage-6, Stage-8 @@ -43,7 +45,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -173,10 +177,57 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: Select Operator expressions: UDFToInteger(KEY.reducesinkkey0) (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 @@ -213,6 +264,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 5 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Dependency Collection @@ -251,6 +349,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucketed_table + Is Table Level Stats: true + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.unbucketed_table + Is Table Level Stats: true + Stage: Stage-1 Move Operator tables: @@ -514,7 +628,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} SORTBUCKETCOLSPREFIX TRUE numFiles 2 numRows 500 diff --git a/ql/src/test/results/clientpositive/llap/bucket6.q.out b/ql/src/test/results/clientpositive/llap/bucket6.q.out index 20895f8a9f..fe2d3a6679 100644 --- a/ql/src/test/results/clientpositive/llap/bucket6.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket6.q.out @@ -17,6 +17,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -24,6 +25,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -58,6 +60,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_bucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -75,6 +105,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_bucket + PREHOOK: query: insert into table src_bucket select key,value from srcpart PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out b/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out index d68797fbf4..0d7fede597 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out @@ -63,22 +63,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -91,11 +91,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -104,13 +104,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -191,22 +191,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -219,11 +219,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -232,13 +232,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -292,22 +292,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: length(key) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -320,13 +320,13 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -373,22 +373,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: abs(length(key)) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -401,13 +401,13 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -455,22 +455,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -483,11 +483,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -496,13 +496,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -557,22 +557,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -585,11 +585,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -598,13 +598,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -658,22 +658,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -685,10 +685,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1043,22 +1043,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1070,10 +1070,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1158,23 +1158,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) bucketGroup: true keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -1187,11 +1187,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -1200,13 +1200,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1261,22 +1261,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -1289,11 +1289,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -1302,13 +1302,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1464,23 +1464,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) bucketGroup: true keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -1493,11 +1493,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -1506,13 +1506,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1567,22 +1567,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string), key (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col1 (type: string), _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: bigint) Execution mode: llap @@ -1595,15 +1595,15 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -1612,13 +1612,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/bucket_many.q.out b/ql/src/test/results/clientpositive/llap/bucket_many.q.out index b78cbaa0c3..c9015c11cb 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_many.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_many.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -26,6 +27,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -140,6 +142,53 @@ STAGE PLANS: TotalFiles: 256 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -177,6 +226,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket_many + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucket_many select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out index 042c60bf17..a16a1d784c 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out @@ -132,19 +132,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -152,14 +152,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -169,14 +169,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -217,39 +217,39 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tab_part - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: int), value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -259,11 +259,11 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -272,15 +272,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 4 - Statistics: Num rows: 275 Data size: 5121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 308 Data size: 2464 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -289,10 +289,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -346,101 +346,105 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (CUSTOM_EDGE), Map 4 (CUSTOM_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (CUSTOM_EDGE), Map 4 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 617 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 617 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 4 + Statistics: Num rows: 1573 Data size: 12584 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 4 - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: d - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -498,101 +502,105 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 3 <- Map 1 (CUSTOM_EDGE), Map 2 (CUSTOM_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (CUSTOM_EDGE), Map 4 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 617 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 617 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 4 + Statistics: Num rows: 1573 Data size: 12584 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + alias: d + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 0 Map 2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 0 Map 1 - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Reducer 4 + Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -680,14 +688,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -697,14 +705,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Reducer 2 - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 266 Data size: 25289 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 266 Data size: 25289 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 266 Data size: 25289 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -893,22 +901,58 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Reducer 3 (CUSTOM_EDGE) + Map 2 <- Map 4 (CUSTOM_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3 + input vertices: + 1 Reducer 3 + Statistics: Num rows: 298 Data size: 30694 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: int), _col2 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 298 Data size: 30694 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 298 Data size: 30694 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan alias: x - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -917,66 +961,46 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 3 - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + 1 Map 4 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), substr(_col1, 5) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: y - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -984,32 +1008,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 133 Data size: 2476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 133 Data size: 2476 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 4 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) Stage: Stage-0 Fetch Operator @@ -1041,14 +1050,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1057,18 +1066,18 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col4 input vertices: 1 Map 2 2 Map 3 - Statistics: Num rows: 1100 Data size: 20486 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1276 Data size: 237336 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1100 Data size: 20486 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1276 Data size: 237336 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1100 Data size: 20486 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1276 Data size: 237336 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1078,40 +1087,40 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1138,21 +1147,22 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (CUSTOM_EDGE), Map 3 (CUSTOM_EDGE) + Map 1 <- Map 3 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: x - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1161,70 +1171,77 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 1 Map 3 + Statistics: Num rows: 617 Data size: 58615 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 617 Data size: 58615 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan alias: y - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1276 Data size: 237336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1276 Data size: 237336 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1276 Data size: 237336 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1285,14 +1302,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1302,14 +1319,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Reducer 2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1385,14 +1402,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1402,14 +1419,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Reducer 2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1455,60 +1472,66 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 565 Data size: 105090 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 565 Data size: 105090 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1562,19 +1585,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1582,14 +1605,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1599,14 +1622,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1633,93 +1656,100 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (CUSTOM_EDGE), Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1168 Data size: 9344 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1744,79 +1774,86 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 47094 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 44528 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 242 Data size: 44528 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col2 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col2 (type: string) - Statistics: Num rows: 242 Data size: 44528 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: string) + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Select Operator expressions: _col2 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 44528 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE Dynamic Partitioning Event Operator Target column: ds (string) - Target Input: b + Target Input: a Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: PARTIAL - Target Vertex: Map 2 + Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 1 Execution mode: llap LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 97312 Basic stats: COMPLETE Column stats: PARTIAL - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: key (type: int), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: PARTIAL - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col2 (type: string) - 1 _col0 (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1, _col4 - input vertices: - 0 Map 1 - Statistics: Num rows: 60500 Data size: 12826000 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60500 Data size: 484000 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 60500 Data size: 484000 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 2 Execution mode: llap - LLAP IO: no inputs + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col2 (type: string) + 1 _col0 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1863,14 +1900,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1879,29 +1916,29 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1911,10 +1948,10 @@ STAGE PLANS: aggregations: count() mode: complete outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1982,14 +2019,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1998,29 +2035,29 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2030,10 +2067,10 @@ STAGE PLANS: aggregations: count() mode: complete outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out index 123276795a..b67b2029e3 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out @@ -121,93 +121,100 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (CUSTOM_EDGE), Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1168 Data size: 9344 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -254,30 +261,30 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -289,29 +296,36 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 UDFToDouble(_col0) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 97900 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 97900 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 97900 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 590 Data size: 109740 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 590 Data size: 109740 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 590 Data size: 109740 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -341,14 +355,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tab_part - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key > 1) and (key > 2)) (type: boolean) - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -358,10 +372,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 2 - Statistics: Num rows: 60 Data size: 1126 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1096 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 60 Data size: 1126 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1096 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -372,19 +386,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tab_part - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key > 2) and (key > 1)) (type: boolean) - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs @@ -416,14 +430,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tab_part - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1) (type: boolean) - Statistics: Num rows: 166 Data size: 3091 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 3091 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -433,10 +447,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 2 - Statistics: Num rows: 182 Data size: 3400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 3400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -447,19 +461,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tab_part - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key > 2) and (key > 1)) (type: boolean) - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs @@ -491,33 +505,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tab_part - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key > 1) and (key > 2)) (type: boolean) - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: tab_part - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 2) (type: boolean) - Statistics: Num rows: 166 Data size: 3091 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 3091 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -527,10 +541,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 0 Map 1 - Statistics: Num rows: 182 Data size: 3400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 3400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -557,46 +571,59 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) + Map 3 <- Reducer 2 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: tab - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 117 Data size: 468 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 117 Data size: 468 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 242 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 242 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -606,24 +633,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 121 Data size: 2251 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 117 Data size: 468 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 117 Data size: 468 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -644,46 +659,47 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: tab - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: value (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 112 Data size: 10192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 112 Data size: 10192 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -694,28 +710,35 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 121 Data size: 2251 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 UDFToDouble(_col0) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col2 - input vertices: - 1 Map 3 - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 112 Data size: 10192 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 112 Data size: 10192 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 231 Data size: 42042 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 231 Data size: 42042 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 231 Data size: 42042 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out index feb3bbcfc4..92b0b73fed 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out @@ -393,13 +393,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -478,7 +480,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -605,6 +607,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -641,6 +690,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin_part b @@ -759,13 +816,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -844,7 +903,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -950,7 +1009,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -971,6 +1030,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -984,7 +1090,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1007,6 +1113,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin_part b diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out index bda8070ba5..ce206173ae 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out @@ -105,13 +105,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -192,7 +194,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -319,6 +321,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -355,6 +404,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -477,13 +534,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -564,7 +623,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -670,7 +729,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -691,6 +750,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -704,7 +810,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -727,6 +833,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -866,13 +980,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -953,7 +1069,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -1108,7 +1224,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1129,6 +1245,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 63 Data size: 7246 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1142,7 +1305,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1165,6 +1328,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out index ae17c1b707..273935f11c 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out @@ -129,13 +129,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -216,7 +218,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -343,6 +345,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -379,6 +428,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b @@ -501,13 +558,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -588,7 +647,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -694,7 +753,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -715,6 +774,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -728,7 +834,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -751,6 +857,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out index aab43431e6..8fe4087015 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out @@ -129,13 +129,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -214,7 +216,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -339,6 +341,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -375,6 +424,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin b @@ -485,13 +542,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -570,7 +629,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -674,7 +733,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -695,6 +754,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -708,7 +814,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -731,6 +837,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin b diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out index e4fed1124b..9d441032c0 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out @@ -93,6 +93,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -101,25 +102,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -127,14 +129,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15036 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -144,16 +146,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col4 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3111 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -163,15 +165,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 17 Data size: 4641 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -191,6 +229,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -272,6 +317,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -280,25 +326,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 20 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 20 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 5540 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 5540 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -306,14 +353,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15036 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -323,16 +370,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col4 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 6405 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 6580 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 6580 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -342,15 +389,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 6580 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 6580 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 35 Data size: 9555 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -370,6 +453,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -475,6 +565,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -483,25 +574,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -509,14 +601,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 168 Data size: 2816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 168 Data size: 2816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 168 Data size: 2816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 46704 Basic stats: COMPLETE Column stats: PARTIAL Map Join Operator condition map: Inner Join 0 to 1 @@ -526,16 +618,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col4 input vertices: 0 Map 1 - Statistics: Num rows: 184 Data size: 3097 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 6405 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 184 Data size: 3097 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 6580 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 184 Data size: 3097 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 6580 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -545,15 +637,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 184 Data size: 3097 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 6580 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 184 Data size: 3097 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 6580 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 35 Data size: 9555 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -573,6 +701,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -684,6 +819,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -692,25 +828,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_table1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -718,14 +855,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_table2 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -735,16 +872,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3111 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -754,15 +891,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 17 Data size: 4641 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -782,6 +955,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM @@ -875,6 +1055,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -883,25 +1064,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_table1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), concat(value, value) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -909,14 +1091,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_table2 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), concat(value, value) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -926,16 +1108,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 6324 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -945,15 +1127,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 17 Data size: 4641 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -973,6 +1191,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.v1, b.v2) FROM @@ -1066,6 +1291,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1074,25 +1300,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_table1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1100,14 +1327,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_table2 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1117,16 +1344,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3111 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (_col0 + _col0) (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1136,15 +1363,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 17 Data size: 4641 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1164,6 +1427,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key+a.key, concat(a.value, b.value) FROM diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out index 67e925ad99..3c04d4b206 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out @@ -71,6 +71,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -79,25 +80,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -105,14 +107,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -122,16 +124,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col6 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 187 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -141,15 +143,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 1 Data size: 277 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -169,6 +207,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.key2, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -257,6 +302,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -265,25 +311,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -291,14 +338,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -308,16 +355,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col6 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 187 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -327,15 +374,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 1 Data size: 277 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -355,6 +438,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq1.key, subq1.key2, subq1.value from ( @@ -443,6 +533,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -451,25 +542,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -477,14 +569,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -494,16 +586,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col6 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 187 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: int), concat(_col2, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -513,15 +605,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 1 Data size: 277 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -541,6 +669,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq1.key2, subq1.key, subq1.value from @@ -564,6 +699,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -572,25 +708,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -598,14 +735,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -615,16 +752,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col6 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 187 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: int), concat(_col2, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -634,15 +771,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 1 Data size: 277 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -662,6 +835,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq2.key, subq2.key2, subq2.value from @@ -691,6 +871,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -699,25 +880,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -725,14 +907,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -742,16 +924,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col6 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 187 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -761,15 +943,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 1 Data size: 277 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -789,6 +1007,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq2.key, subq2.key2, subq2.value from ( @@ -895,6 +1120,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -903,25 +1129,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -929,14 +1156,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -946,16 +1173,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col6 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 187 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -965,15 +1192,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 1 Data size: 277 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -993,6 +1256,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq2.k2, subq2.k1, subq2.value from ( @@ -1109,6 +1379,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1117,25 +1388,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1143,14 +1415,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1160,16 +1432,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col6 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 187 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: -- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1179,15 +1451,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table4 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 1 Data size: 277 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1207,3 +1515,10 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table4 + diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out index 8691d0d34e..2ca6e9f539 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out @@ -71,6 +71,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -79,25 +80,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -105,14 +107,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 716 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -122,16 +124,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col4 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 915 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -141,15 +143,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 5 Data size: 1365 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -169,6 +207,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -254,6 +299,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -262,25 +308,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_table1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -288,14 +335,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_table2 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -305,16 +352,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 915 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -324,15 +371,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 5 Data size: 1365 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -352,6 +435,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM @@ -443,6 +533,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -451,25 +542,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_table1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and ((key = 0) or (key = 5))) (type: boolean) - Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -477,14 +569,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_table2 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and ((key = 0) or (key = 5))) (type: boolean) - Statistics: Num rows: 28 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -494,16 +586,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 30 Data size: 515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 915 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -513,15 +605,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 30 Data size: 515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 5 Data size: 1365 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -541,6 +669,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_lineage2.q.out b/ql/src/test/results/clientpositive/llap/cbo_rp_lineage2.q.out index 44f9d68a7e..58ab6ad0c9 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_rp_lineage2.q.out +++ b/ql/src/test/results/clientpositive/llap/cbo_rp_lineage2.q.out @@ -36,7 +36,7 @@ PREHOOK: query: insert into table dest1 select * from src2 PREHOOK: type: QUERY PREHOOK: Input: default@src2 PREHOOK: Output: default@dest1 -{"version":"1.0","engine":"tez","database":"default","hash":"ecc718a966d8887b18084a55dd96f0bc","queryText":"insert into table dest1 select * from src2","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"ecc718a966d8887b18084a55dd96f0bc","queryText":"insert into table dest1 select * from src2","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: select key k, dest1.value from dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 @@ -467,20 +467,20 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"e494b771d94800dc3430bf5d0810cd9f","queryText":"insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[],"vertices":[]} +{"version":"1.0","engine":"tez","database":"default","hash":"e494b771d94800dc3430bf5d0810cd9f","queryText":"insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 16)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"efeaddd0d36105b1013b414627850dc2","queryText":"insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[],"vertices":[]} +{"version":"1.0","engine":"tez","database":"default","hash":"efeaddd0d36105b1013b414627850dc2","queryText":"insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 16)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert into table dest2 select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"e9450a56b3d103642e06bef0e4f0d482","queryText":"insert into table dest2\n select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1","edges":[],"vertices":[]} +{"version":"1.0","engine":"tez","database":"default","hash":"e9450a56b3d103642e06bef0e4f0d482","queryText":"insert into table dest2\n select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1","edges":[{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 16)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[0,1,2,3],"expression":"src1.value is not null","edgeType":"PREDICATE"},{"sources":[5,7],"targets":[0,1,2,3],"expression":"(length(src1.value) = (length(src2.value2) + 1))","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2,3],"expression":"src2.value2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: select * from src1 where length(key) > 2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 @@ -530,7 +530,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"76d84512204ddc576ad4d93f252e4358","queryText":"insert overwrite table dest2\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[],"vertices":[]} +{"version":"1.0","engine":"tez","database":"default","hash":"76d84512204ddc576ad4d93f252e4358","queryText":"insert overwrite table dest2\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 16)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 3) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 3) and src2.key2 is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: drop table if exists dest_l1 PREHOOK: type: DROPTABLE PREHOOK: query: CREATE TABLE dest_l1(key INT, value STRING) STORED AS TEXTFILE @@ -552,7 +552,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Output: default@dest_l1 -{"version":"1.0","engine":"tez","database":"default","hash":"60b589744e2527dd235a6c8168d6a653","queryText":"INSERT OVERWRITE TABLE dest_l1\nSELECT j.*\nFROM (SELECT t1.key, p1.value\n FROM src1 t1\n LEFT OUTER JOIN src p1\n ON (t1.key = p1.key)\n UNION ALL\n SELECT t2.key, p2.value\n FROM src1 t2\n LEFT OUTER JOIN src p2\n ON (t2.key = p2.key)) j","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(key)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"value","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(j-subquery1:_u1-subquery1:p1.key = j-subquery1:_u1-subquery1:t1.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(j-subquery2:_u1-subquery2:p2.key = j-subquery2:_u1-subquery2:t2.key)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src.key"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"60b589744e2527dd235a6c8168d6a653","queryText":"INSERT OVERWRITE TABLE dest_l1\nSELECT j.*\nFROM (SELECT t1.key, p1.value\n FROM src1 t1\n LEFT OUTER JOIN src p1\n ON (t1.key = p1.key)\n UNION ALL\n SELECT t2.key, p2.value\n FROM src1 t2\n LEFT OUTER JOIN src p2\n ON (t2.key = p2.key)) j","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(key)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"value","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(j-subquery1:_u1-subquery1:p1.key = j-subquery1:_u1-subquery1:t1.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(j-subquery2:_u1-subquery2:p2.key = j-subquery2:_u1-subquery2:t2.key)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0],"expression":"compute_stats(UDFToInteger(key), 16)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(value, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src.key"}]} PREHOOK: query: drop table if exists emp PREHOOK: type: DROPTABLE PREHOOK: query: drop table if exists dept @@ -593,7 +593,7 @@ PREHOOK: Input: default@dept PREHOOK: Input: default@emp PREHOOK: Input: default@project PREHOOK: Output: default@tgt -{"version":"1.0","engine":"tez","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.emp_id = emd:em:m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12,9],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.dept_id = emd:d.dept_id AND emd:em:e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.emp_id = emd:em:m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12,9],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.dept_id = emd:d.dept_id AND emd:em:e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"},{"sources":[6],"targets":[0],"expression":"compute_stats(default.dept.dept_name, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"expression":"compute_stats(default.emp.name, 16)","edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"expression":"compute_stats(default.emp.emp_id, 16)","edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"expression":"compute_stats(default.project.project_id, 16)","edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"expression":"compute_stats(default.project.project_name, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]} PREHOOK: query: drop table if exists dest_l2 PREHOOK: type: DROPTABLE PREHOOK: query: create table dest_l2 (id int, c1 tinyint, c2 int, c3 bigint) stored as textfile @@ -603,7 +603,7 @@ PREHOOK: Output: default@dest_l2 PREHOOK: query: insert into dest_l2 values(0, 1, 100, 10000) PREHOOK: type: QUERY PREHOOK: Output: default@dest_l2 -{"version":"1.0","engine":"tez","database":"default","hash":"e001334e3f8384806b0f25a7c303045f","queryText":"insert into dest_l2 values(0, 1, 100, 10000)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"UDFToByte(tmp_values_col2)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"UDFToInteger(tmp_values_col3)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToLong(tmp_values_col4)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"e001334e3f8384806b0f25a7c303045f","queryText":"insert into dest_l2 values(0, 1, 100, 10000)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"UDFToByte(tmp_values_col2)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"UDFToInteger(tmp_values_col3)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToLong(tmp_values_col4)","edgeType":"PROJECTION"},{"sources":[],"targets":[0],"expression":"compute_stats(UDFToInteger(tmp_values_col1), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"compute_stats(UDFToByte(tmp_values_col2), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"compute_stats(UDFToInteger(tmp_values_col3), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"compute_stats(UDFToLong(tmp_values_col4), 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"}]} PREHOOK: query: select * from ( select c1 + c2 x from dest_l2 union all @@ -623,7 +623,7 @@ PREHOOK: Output: default@dest_l3 PREHOOK: query: insert into dest_l3 values(0, "s1", "s2", 15) PREHOOK: type: QUERY PREHOOK: Output: default@dest_l3 -{"version":"1.0","engine":"tez","database":"default","hash":"09df51ba6ba2d07f2304523ee505f094","queryText":"insert into dest_l3 values(0, \"s1\", \"s2\", 15)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1,2],"edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToInteger(tmp_values_col4)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"09df51ba6ba2d07f2304523ee505f094","queryText":"insert into dest_l3 values(0, \"s1\", \"s2\", 15)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1,2],"edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToInteger(tmp_values_col4)","edgeType":"PROJECTION"},{"sources":[],"targets":[0],"expression":"compute_stats(UDFToInteger(tmp_values_col1), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"compute_stats(default.values__tmp__table__2.tmp_values_col2, 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"compute_stats(default.values__tmp__table__2.tmp_values_col3, 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"compute_stats(UDFToInteger(tmp_values_col4), 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} PREHOOK: query: select sum(a.c1) over (partition by a.c1 order by a.id) from dest_l2 a where a.c2 != 10 diff --git a/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out b/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out index 6d941fd4f2..5d213f6870 100644 --- a/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out +++ b/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out @@ -36,7 +36,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"month\":\"true\",\"year\":\"true\"}} numFiles 1 numRows 3 rawDataSize 21 @@ -79,7 +79,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"month\":\"true\",\"year\":\"true\"}} numFiles 1 numRows 3 rawDataSize 21 @@ -115,56 +115,12 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select max(year) from calendar POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: calendar - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: year (type: int) - outputColumnNames: year - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(year) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -280,7 +236,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"month\":\"true\",\"year\":\"true\"}} numFiles 2 numRows 4 rawDataSize 28 @@ -302,56 +258,12 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select max(year) from calendar POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: calendar - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: year (type: int) - outputColumnNames: year - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: max(year) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -369,56 +281,12 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select max(month) from calendar POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: calendar - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: month (type: int) - outputColumnNames: month - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: max(month) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -459,7 +327,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"year\":\"true\"}} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"month\":\"true\",\"year\":\"true\"}} numFiles 2 numRows 4 rawDataSize 28 @@ -504,56 +372,12 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select max(month) from calendar POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: calendar - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: month (type: int) - outputColumnNames: month - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: max(month) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -670,7 +494,7 @@ Database: default Table: calendarp #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"year\":\"true\"}} numFiles 1 numRows 3 rawDataSize 12 @@ -692,68 +516,22 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select max(year) from calendarp where p=1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: calendarp - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: year (type: int) - outputColumnNames: year - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(year) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink PREHOOK: query: select max(year) from calendarp where p=1 PREHOOK: type: QUERY PREHOOK: Input: default@calendarp -PREHOOK: Input: default@calendarp@p=1 #### A masked pattern was here #### POSTHOOK: query: select max(year) from calendarp where p=1 POSTHOOK: type: QUERY POSTHOOK: Input: default@calendarp -POSTHOOK: Input: default@calendarp@p=1 #### A masked pattern was here #### 2012 PREHOOK: query: analyze table calendarp partition (p=1) compute statistics for columns @@ -850,7 +628,7 @@ Database: default Table: calendarp #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"year\":\"true\"}} numFiles 2 numRows 4 rawDataSize 16 @@ -872,68 +650,22 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select max(year) from calendarp where p=1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: calendarp - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: year (type: int) - outputColumnNames: year - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: max(year) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink PREHOOK: query: select max(year) from calendarp where p=1 PREHOOK: type: QUERY PREHOOK: Input: default@calendarp -PREHOOK: Input: default@calendarp@p=1 #### A masked pattern was here #### POSTHOOK: query: select max(year) from calendarp where p=1 POSTHOOK: type: QUERY POSTHOOK: Input: default@calendarp -POSTHOOK: Input: default@calendarp@p=1 #### A masked pattern was here #### 2015 PREHOOK: query: create table t (key string, value string) diff --git a/ql/src/test/results/clientpositive/llap/column_access_stats.q.out b/ql/src/test/results/clientpositive/llap/column_access_stats.q.out index c56c818617..0a98f84dd2 100644 --- a/ql/src/test/results/clientpositive/llap/column_access_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/column_access_stats.q.out @@ -406,19 +406,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -431,10 +431,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 467 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 467 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -538,19 +538,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((UDFToDouble(val) = 3.0) and key is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), val (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -564,10 +564,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 374 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 374 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -639,19 +639,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((UDFToDouble(key) = 6.0) and val is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: val (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -664,10 +664,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 187 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 187 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -753,38 +753,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: t3 - Statistics: Num rows: 5 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), val (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -798,12 +798,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 467 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 467 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -814,10 +814,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 513 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 513 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out b/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out index d55cf30331..2e6ede976c 100644 --- a/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out +++ b/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out @@ -36,7 +36,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1 rawDataSize 170 @@ -74,22 +74,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Statistics Aggregation Key Prefix: default.s/ GatherStats: true Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 16), compute_stats(value, 16) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col0 (type: struct), _col1 (type: struct) auto parallelism: false @@ -104,7 +104,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -125,7 +125,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -154,13 +154,13 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -365,7 +365,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 #### A masked pattern was here #### name default.spart @@ -409,7 +409,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 #### A masked pattern was here #### name default.spart @@ -770,7 +770,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 #### A masked pattern was here #### name default.spart @@ -974,7 +974,7 @@ Database: default Table: spart #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1 rawDataSize 170 diff --git a/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out b/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out index dc50fb7fc1..4a8f4d41a5 100644 --- a/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out +++ b/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out @@ -98,18 +98,18 @@ PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key int from deserializer +key int 27 484 0 18 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string from deserializer +value string 0 18 6.8 7 from deserializer PREHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr=2, part) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@partcolstats @@ -152,18 +152,18 @@ PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key int from deserializer +key int 27 495 0 28 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string from deserializer +value string 0 18 6.833333333333333 7 from deserializer PREHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr, part) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@partcolstats @@ -210,36 +210,36 @@ PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key int from deserializer +key int 15 495 0 43 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string from deserializer +value string 0 34 6.825 7 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') key PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key int from deserializer +key int 15 495 0 51 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string from deserializer +value string 0 53 6.883333333333334 7 from deserializer PREHOOK: query: analyze table partcolstats partition (ds, hr, part) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@partcolstats diff --git a/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out b/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out index 4bdb186dfa..037a419b9c 100644 --- a/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out +++ b/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out @@ -62,30 +62,30 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_12] - Select Operator [SEL_11] (rows=5 width=22) + Select Operator [SEL_11] (rows=1 width=185) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_17] (rows=5 width=22) + Merge Join Operator [MERGEJOIN_17] (rows=1 width=94) Conds:RS_8._col3=RS_9._col0(Left Semi),Output:["_col0","_col2"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:_col3 - Select Operator [SEL_2] (rows=5 width=20) + Select Operator [SEL_2] (rows=2 width=189) Output:["_col0","_col2","_col3"] - Filter Operator [FIL_15] (rows=5 width=20) + Filter Operator [FIL_15] (rows=2 width=189) predicate:((val = 't1val01') and dimid is not null) - TableScan [TS_0] (rows=10 width=20) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"] + TableScan [TS_0] (rows=10 width=189) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1","dimid"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0 - Group By Operator [GBY_7] (rows=5 width=3) + Group By Operator [GBY_7] (rows=2 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_5] (rows=5 width=3) + Select Operator [SEL_5] (rows=5 width=4) Output:["_col0"] - Filter Operator [FIL_16] (rows=5 width=3) + Filter Operator [FIL_16] (rows=5 width=4) predicate:id is not null - TableScan [TS_3] (rows=5 width=3) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + TableScan [TS_3] (rows=5 width=4) + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table1.val1 from table1 left semi join table3 on table1.dimid = table3.id where table1.val = 't1val01' PREHOOK: type: QUERY @@ -116,44 +116,44 @@ Stage-0 Stage-1 Reducer 3 llap File Output Operator [FS_18] - Select Operator [SEL_17] (rows=5 width=24) + Select Operator [SEL_17] (rows=1 width=187) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_28] (rows=5 width=24) + Merge Join Operator [MERGEJOIN_28] (rows=1 width=96) Conds:RS_14._col0=RS_15._col0(Inner),Output:["_col0","_col4"] <-Map 5 [SIMPLE_EDGE] llap SHUFFLE [RS_15] PartitionCols:_col0 - Select Operator [SEL_8] (rows=3 width=10) + Select Operator [SEL_8] (rows=3 width=96) Output:["_col0","_col1"] - Filter Operator [FIL_26] (rows=3 width=10) + Filter Operator [FIL_26] (rows=3 width=96) predicate:id is not null - TableScan [TS_6] (rows=3 width=10) - default@table2,table2,Tbl:COMPLETE,Col:NONE,Output:["id","val2"] + TableScan [TS_6] (rows=3 width=96) + default@table2,table2,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val2"] <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_14] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_27] (rows=5 width=22) + Merge Join Operator [MERGEJOIN_27] (rows=1 width=4) Conds:RS_11._col2=RS_12._col0(Left Semi),Output:["_col0"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_11] PartitionCols:_col2 - Select Operator [SEL_2] (rows=5 width=20) + Select Operator [SEL_2] (rows=2 width=99) Output:["_col0","_col2"] - Filter Operator [FIL_24] (rows=5 width=20) + Filter Operator [FIL_24] (rows=2 width=99) predicate:((val = 't1val01') and id is not null and dimid is not null) - TableScan [TS_0] (rows=10 width=20) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","dimid"] + TableScan [TS_0] (rows=10 width=99) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","dimid"] <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_12] PartitionCols:_col0 - Group By Operator [GBY_10] (rows=5 width=3) + Group By Operator [GBY_10] (rows=2 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_5] (rows=5 width=3) + Select Operator [SEL_5] (rows=5 width=4) Output:["_col0"] - Filter Operator [FIL_25] (rows=5 width=3) + Filter Operator [FIL_25] (rows=5 width=4) predicate:id is not null - TableScan [TS_3] (rows=5 width=3) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + TableScan [TS_3] (rows=5 width=4) + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table2.val2 from table1 inner join table2 on table1.val = 't1val01' and table1.id = table2.id left semi join table3 on table1.dimid = table3.id PREHOOK: type: QUERY @@ -186,44 +186,44 @@ Stage-0 Stage-1 Reducer 3 llap File Output Operator [FS_18] - Select Operator [SEL_17] (rows=5 width=24) + Select Operator [SEL_17] (rows=1 width=187) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_28] (rows=5 width=24) + Merge Join Operator [MERGEJOIN_28] (rows=1 width=96) Conds:RS_14._col0=RS_15._col0(Inner),Output:["_col0","_col4"] <-Map 5 [SIMPLE_EDGE] llap SHUFFLE [RS_15] PartitionCols:_col0 - Select Operator [SEL_8] (rows=3 width=10) + Select Operator [SEL_8] (rows=3 width=96) Output:["_col0","_col1"] - Filter Operator [FIL_26] (rows=3 width=10) + Filter Operator [FIL_26] (rows=3 width=96) predicate:id is not null - TableScan [TS_6] (rows=3 width=10) - default@table2,table2,Tbl:COMPLETE,Col:NONE,Output:["id","val2"] + TableScan [TS_6] (rows=3 width=96) + default@table2,table2,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val2"] <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_14] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_27] (rows=5 width=22) + Merge Join Operator [MERGEJOIN_27] (rows=1 width=4) Conds:RS_11._col2=RS_12._col0(Left Semi),Output:["_col0"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_11] PartitionCols:_col2 - Select Operator [SEL_2] (rows=5 width=20) + Select Operator [SEL_2] (rows=2 width=99) Output:["_col0","_col2"] - Filter Operator [FIL_24] (rows=5 width=20) + Filter Operator [FIL_24] (rows=2 width=99) predicate:((val = 't1val01') and dimid is not null and id is not null) - TableScan [TS_0] (rows=10 width=20) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","dimid"] + TableScan [TS_0] (rows=10 width=99) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","dimid"] <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_12] PartitionCols:_col0 - Group By Operator [GBY_10] (rows=5 width=3) + Group By Operator [GBY_10] (rows=2 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_5] (rows=5 width=3) + Select Operator [SEL_5] (rows=5 width=4) Output:["_col0"] - Filter Operator [FIL_25] (rows=5 width=3) + Filter Operator [FIL_25] (rows=5 width=4) predicate:id is not null - TableScan [TS_3] (rows=5 width=3) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + TableScan [TS_3] (rows=5 width=4) + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table2.val2 from table1 left semi join table3 on table1.dimid = table3.id inner join table2 on table1.val = 't1val01' and table1.id = table2.id PREHOOK: type: QUERY @@ -255,28 +255,28 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_17] (rows=2 width=3) + Merge Join Operator [MERGEJOIN_17] (rows=1 width=185) Conds:RS_8.100, true=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:100, true - Select Operator [SEL_2] (rows=1 width=20) + Select Operator [SEL_2] (rows=1 width=193) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_15] (rows=1 width=20) + Filter Operator [FIL_15] (rows=1 width=185) predicate:false - TableScan [TS_0] (rows=10 width=20) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1"] + TableScan [TS_0] (rows=10 width=185) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0, _col1 - Group By Operator [GBY_7] (rows=2 width=3) + Group By Operator [GBY_7] (rows=1 width=8) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=2 width=3) + Select Operator [SEL_5] (rows=1 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=2 width=3) + Filter Operator [FIL_16] (rows=1 width=4) predicate:(id = 100) - TableScan [TS_3] (rows=5 width=3) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + TableScan [TS_3] (rows=5 width=4) + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table1.val1 from table1 left semi join table3 on table1.dimid = table3.id and table3.id = 100 where table1.dimid <> 100 PREHOOK: type: QUERY @@ -303,28 +303,28 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_17] (rows=5 width=22) + Merge Join Operator [MERGEJOIN_17] (rows=3 width=185) Conds:RS_8.100, true=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:100, true - Select Operator [SEL_2] (rows=5 width=20) + Select Operator [SEL_2] (rows=3 width=193) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_15] (rows=5 width=20) + Filter Operator [FIL_15] (rows=3 width=189) predicate:(dimid = 100) - TableScan [TS_0] (rows=10 width=20) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"] + TableScan [TS_0] (rows=10 width=189) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1","dimid"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0, _col1 - Group By Operator [GBY_7] (rows=2 width=3) + Group By Operator [GBY_7] (rows=1 width=8) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=2 width=3) + Select Operator [SEL_5] (rows=1 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=2 width=3) + Filter Operator [FIL_16] (rows=1 width=4) predicate:(id = 100) - TableScan [TS_3] (rows=5 width=3) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + TableScan [TS_3] (rows=5 width=4) + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table1.val1 from table1 left semi join table3 on table1.dimid = table3.id and table3.id = 100 where table1.dimid IN (100,200) PREHOOK: type: QUERY @@ -353,28 +353,28 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_17] (rows=2 width=3) + Merge Join Operator [MERGEJOIN_17] (rows=1 width=185) Conds:RS_8.100, true=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:100, true - Select Operator [SEL_2] (rows=1 width=20) + Select Operator [SEL_2] (rows=1 width=193) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_15] (rows=1 width=20) + Filter Operator [FIL_15] (rows=1 width=185) predicate:false - TableScan [TS_0] (rows=10 width=20) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1"] + TableScan [TS_0] (rows=10 width=185) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0, _col1 - Group By Operator [GBY_7] (rows=2 width=3) + Group By Operator [GBY_7] (rows=1 width=8) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=2 width=3) + Select Operator [SEL_5] (rows=1 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=2 width=3) + Filter Operator [FIL_16] (rows=1 width=4) predicate:(id = 100) - TableScan [TS_3] (rows=5 width=3) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + TableScan [TS_3] (rows=5 width=4) + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table1.val1 from table1 left semi join table3 on table1.dimid = table3.id and table3.id = 100 where table1.dimid = 200 PREHOOK: type: QUERY @@ -401,28 +401,28 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_17] (rows=5 width=22) + Merge Join Operator [MERGEJOIN_17] (rows=3 width=185) Conds:RS_8.100, true=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:100, true - Select Operator [SEL_2] (rows=5 width=20) + Select Operator [SEL_2] (rows=3 width=193) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_15] (rows=5 width=20) + Filter Operator [FIL_15] (rows=3 width=189) predicate:(dimid = 100) - TableScan [TS_0] (rows=10 width=20) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"] + TableScan [TS_0] (rows=10 width=189) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1","dimid"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0, _col1 - Group By Operator [GBY_7] (rows=2 width=3) + Group By Operator [GBY_7] (rows=1 width=8) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=2 width=3) + Select Operator [SEL_5] (rows=1 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=2 width=3) + Filter Operator [FIL_16] (rows=1 width=4) predicate:(id = 100) - TableScan [TS_3] (rows=5 width=3) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + TableScan [TS_3] (rows=5 width=4) + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table1.val1 from table1 left semi join table3 on table1.dimid = table3.id and table3.id = 100 where table1.dimid = 100 PREHOOK: type: QUERY @@ -451,28 +451,28 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_17] (rows=5 width=22) + Merge Join Operator [MERGEJOIN_17] (rows=3 width=185) Conds:RS_8.100, true=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:100, true - Select Operator [SEL_2] (rows=5 width=20) + Select Operator [SEL_2] (rows=3 width=193) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_15] (rows=5 width=20) + Filter Operator [FIL_15] (rows=3 width=189) predicate:(dimid = 100) - TableScan [TS_0] (rows=10 width=20) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"] + TableScan [TS_0] (rows=10 width=189) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1","dimid"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0, _col1 - Group By Operator [GBY_7] (rows=2 width=3) + Group By Operator [GBY_7] (rows=1 width=8) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=2 width=3) + Select Operator [SEL_5] (rows=1 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=2 width=3) + Filter Operator [FIL_16] (rows=1 width=4) predicate:(id = 100) - TableScan [TS_3] (rows=5 width=3) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + TableScan [TS_3] (rows=5 width=4) + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table1.val1 from table1 left semi join table3 on table1.dimid = table3.id and table3.id = 100 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/cte_5.q.out b/ql/src/test/results/clientpositive/llap/cte_5.q.out index 58a963b8a2..f3485da879 100644 --- a/ql/src/test/results/clientpositive/llap/cte_5.q.out +++ b/ql/src/test/results/clientpositive/llap/cte_5.q.out @@ -85,17 +85,17 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_10] - Select Operator [SEL_9] (rows=2 width=89) + Select Operator [SEL_9] (rows=2 width=4) Output:["_col0"] - Merge Join Operator [MERGEJOIN_13] (rows=2 width=89) + Merge Join Operator [MERGEJOIN_13] (rows=2 width=8) Conds:(Inner) <-Map 1 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_6] - Select Operator [SEL_2] (rows=1 width=3) - Filter Operator [FIL_11] (rows=1 width=3) + Select Operator [SEL_2] (rows=1 width=4) + Filter Operator [FIL_11] (rows=1 width=4) predicate:(UDFToDouble(colnum) = 5.0) - TableScan [TS_0] (rows=1 width=3) - mydb@q1,a,Tbl:COMPLETE,Col:NONE,Output:["colnum"] + TableScan [TS_0] (rows=1 width=4) + mydb@q1,a,Tbl:COMPLETE,Col:COMPLETE,Output:["colnum"] <-Map 3 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_7] Select Operator [SEL_5] (rows=2 width=85) diff --git a/ql/src/test/results/clientpositive/llap/cte_mat_5.q.out b/ql/src/test/results/clientpositive/llap/cte_mat_5.q.out index 6fa26157a7..41812e5a65 100644 --- a/ql/src/test/results/clientpositive/llap/cte_mat_5.q.out +++ b/ql/src/test/results/clientpositive/llap/cte_mat_5.q.out @@ -84,17 +84,17 @@ Stage-3 Stage-4 Reducer 3 llap File Output Operator [FS_15] - Merge Join Operator [MERGEJOIN_20] (rows=1 width=3) + Merge Join Operator [MERGEJOIN_20] (rows=1 width=4) Conds:RS_11.UDFToDouble(_col0)=RS_12.UDFToDouble(_col0)(Inner),Output:["_col0"] <-Map 2 [SIMPLE_EDGE] llap SHUFFLE [RS_11] PartitionCols:UDFToDouble(_col0) - Select Operator [SEL_7] (rows=1 width=3) + Select Operator [SEL_7] (rows=1 width=4) Output:["_col0"] - Filter Operator [FIL_18] (rows=1 width=3) + Filter Operator [FIL_18] (rows=1 width=4) predicate:colnum is not null - TableScan [TS_5] (rows=1 width=3) - mydb@q1,a,Tbl:COMPLETE,Col:NONE,Output:["colnum"] + TableScan [TS_5] (rows=1 width=4) + mydb@q1,a,Tbl:COMPLETE,Col:COMPLETE,Output:["colnum"] <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_12] PartitionCols:UDFToDouble(_col0) diff --git a/ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out b/ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out index 4a2294f71e..a737ef04b3 100644 --- a/ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out +++ b/ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -140,6 +141,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -177,6 +213,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket2_1 + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucket2_1 select * from src PREHOOK: type: QUERY @@ -211,18 +255,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -232,10 +276,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out index 78c907084a..651215eddd 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out @@ -29,6 +29,8 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 Stage-2 depends on stages: Stage-5 Stage-7 depends on stages: Stage-2 Stage-3 depends on stages: Stage-5 @@ -41,13 +43,14 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 8 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Map 1 <- Reducer 9 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 2 (SIMPLE_EDGE) Reducer 5 <- Reducer 2 (SIMPLE_EDGE) Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +69,7 @@ STAGE PLANS: value expressions: ROW__ID (type: struct) Execution mode: llap LLAP IO: may be used (ACID table) - Map 7 + Map 8 Map Operator Tree: TableScan alias: s @@ -215,6 +218,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_tmp_table + Select Operator + expressions: _col0 (type: int) + outputColumnNames: val + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(val, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -231,7 +254,35 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl Write Type: INSERT - Reducer 8 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -260,6 +311,20 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.acidtbl + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: val + Column Types: int + Table: default.merge_tmp_table + Stage: Stage-2 Move Operator tables: @@ -310,16 +375,18 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Map 1 <- Reducer 6 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -337,7 +404,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Execution mode: llap LLAP IO: may be used (ACID table) - Map 4 + Map 5 Map Operator Tree: TableScan alias: s @@ -402,7 +469,35 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl Write Type: INSERT - Reducer 5 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -431,6 +526,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.acidtbl + PREHOOK: query: explain merge into acidTbl as t using ( select * from nonAcidOrcTbl where a > 0 union all @@ -454,6 +556,8 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 Stage-2 depends on stages: Stage-5 Stage-7 depends on stages: Stage-2 Stage-3 depends on stages: Stage-5 @@ -467,14 +571,15 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 10 <- Reducer 8 (BROADCAST_EDGE) - Map 9 <- Union 2 (CONTAINS) - Reducer 3 <- Map 10 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) + Map 10 <- Union 2 (CONTAINS) + Map 11 <- Reducer 9 (BROADCAST_EDGE) + Reducer 3 <- Map 11 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Reducer 3 (SIMPLE_EDGE) Reducer 7 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Union 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -561,6 +666,22 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs + Map 11 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (a BETWEEN DynamicValue(RS_10_nonacidorctbl_a_min) AND DynamicValue(RS_10_nonacidorctbl_a_max) and in_bloom_filter(a, DynamicValue(RS_10_nonacidorctbl_a_bloom_filter))) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: ROW__ID (type: struct) + Execution mode: llap + LLAP IO: may be used (ACID table) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -684,6 +805,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_tmp_table + Select Operator + expressions: _col0 (type: int) + outputColumnNames: val + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(val, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -700,10 +841,38 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl Write Type: INSERT + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=4) mode: final outputColumnNames: _col0, _col1, _col2 @@ -731,6 +900,20 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.acidtbl + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: val + Column Types: int + Table: default.merge_tmp_table + Stage: Stage-2 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out index 453711c57c..76ae92b5d4 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out @@ -160,6 +160,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -173,19 +174,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), _col0 (type: smallint) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -195,11 +196,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -225,6 +226,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_orc + PREHOOK: query: explain insert overwrite table over1k_part_limit_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 limit 10 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part_limit_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 limit 10 @@ -234,6 +242,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -248,20 +257,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: vectorized, llap @@ -272,15 +281,15 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 Execution mode: vectorized, llap @@ -288,11 +297,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -318,6 +327,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_limit_orc + PREHOOK: query: explain insert overwrite table over1k_part_buck_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part_buck_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 @@ -327,6 +343,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -340,19 +357,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), '_bucket_number' (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -362,11 +379,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -391,6 +408,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_orc + PREHOOK: query: explain insert overwrite table over1k_part_buck_sort_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part_buck_sort_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 @@ -400,6 +424,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -413,19 +438,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), '_bucket_number' (type: string), _col3 (type: float) sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -435,11 +460,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -464,6 +489,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort_orc + PREHOOK: query: insert overwrite table over1k_part_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by si PREHOOK: type: QUERY PREHOOK: Input: default@over1k_orc @@ -541,6 +573,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -554,19 +587,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), _col0 (type: smallint) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -576,11 +609,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -606,6 +639,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_orc + PREHOOK: query: explain insert into table over1k_part_limit_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 limit 10 PREHOOK: type: QUERY POSTHOOK: query: explain insert into table over1k_part_limit_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 limit 10 @@ -615,6 +655,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -629,20 +670,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: vectorized, llap @@ -653,15 +694,15 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 Execution mode: vectorized, llap @@ -669,11 +710,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -699,6 +740,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_limit_orc + PREHOOK: query: explain insert into table over1k_part_buck_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert into table over1k_part_buck_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 @@ -708,6 +756,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -721,19 +770,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), '_bucket_number' (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -743,11 +792,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -772,6 +821,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_orc + PREHOOK: query: explain insert into table over1k_part_buck_sort_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert into table over1k_part_buck_sort_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 @@ -781,6 +837,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -794,19 +851,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), '_bucket_number' (type: string), _col3 (type: float) sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -816,11 +873,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -845,6 +902,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort_orc + PREHOOK: query: insert into table over1k_part_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by si PREHOOK: type: QUERY PREHOOK: Input: default@over1k_orc @@ -1324,6 +1388,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1331,42 +1396,79 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part2_orc + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 18 Data size: 1998 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 16), compute_stats(i, 16), compute_stats(b, 16), compute_stats(f, 16) + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 3990 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 2 Data size: 3990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 4022 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 4022 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 4022 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1387,6 +1489,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2_orc + PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i @@ -1396,6 +1505,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1409,19 +1519,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1431,11 +1541,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), KEY._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1461,6 +1571,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2_orc + PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from (select * from over1k_orc order by i limit 10) tmp where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from (select * from over1k_orc order by i limit 10) tmp where t is null or t=27 @@ -1470,6 +1587,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1484,15 +1602,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int) sort order: + - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float) Execution mode: vectorized, llap @@ -1503,22 +1621,22 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col0 is null or (_col0 = 27)) (type: boolean) - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 Execution mode: vectorized, llap @@ -1526,11 +1644,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1556,6 +1674,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2_orc + PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t @@ -1565,6 +1690,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1572,48 +1698,85 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float) sort order: +++++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: tinyint), KEY._col1 (type: smallint), KEY._col2 (type: int), KEY._col3 (type: bigint), KEY._col4 (type: float) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part2_orc + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 9 Data size: 999 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 16), compute_stats(i, 16), compute_stats(b, 16), compute_stats(f, 16) + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 3990 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 2 Data size: 3990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 4022 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 4022 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 4022 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1634,6 +1797,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2_orc + PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t @@ -1643,6 +1813,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1656,20 +1827,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float) sort order: +++++ Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1679,15 +1850,15 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint), KEY._col1 (type: smallint), KEY._col2 (type: int), KEY._col3 (type: bigint), KEY._col4 (type: float) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1713,6 +1884,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2_orc + PREHOOK: query: insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i PREHOOK: type: QUERY PREHOOK: Input: default@over1k_orc @@ -1755,7 +1933,7 @@ Database: default Table: over1k_part2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -1797,7 +1975,7 @@ Database: default Table: over1k_part2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 @@ -2034,6 +2212,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -2041,43 +2220,80 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: float) sort order: + Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY.reducesinkkey0 (type: float), VALUE._col3 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part_buck_sort2_orc + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 16), compute_stats(i, 16), compute_stats(b, 16), compute_stats(f, 16) + keys: t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 2 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 3848 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 3848 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 3848 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2097,6 +2313,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort2_orc + PREHOOK: query: explain insert overwrite table over1k_part_buck_sort2_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part_buck_sort2_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 @@ -2106,6 +2329,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -2119,19 +2343,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), '_bucket_number' (type: string), _col3 (type: float) sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2141,11 +2365,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2170,6 +2394,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort2_orc + PREHOOK: query: insert overwrite table over1k_part_buck_sort2_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY PREHOOK: Input: default@over1k_orc @@ -2211,7 +2442,7 @@ Database: default Table: over1k_part_buck_sort2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -2252,7 +2483,7 @@ Database: default Table: over1k_part_buck_sort2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out index 1dc9ed5a8c..3229b18c4a 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out @@ -117,6 +117,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -182,6 +183,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part + PREHOOK: query: explain insert overwrite table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 limit 10 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 limit 10 @@ -191,6 +199,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -275,6 +284,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_limit + PREHOOK: query: explain insert overwrite table over1k_part_buck partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part_buck partition(t) select si,i,b,f,t from over1k where t is null or t=27 @@ -284,6 +300,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -348,6 +365,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck + PREHOOK: query: explain insert overwrite table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k where t is null or t=27 @@ -357,6 +381,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -421,6 +446,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort + PREHOOK: query: insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY PREHOOK: Input: default@over1k @@ -498,6 +530,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -563,6 +596,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part + PREHOOK: query: explain insert into table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 limit 10 PREHOOK: type: QUERY POSTHOOK: query: explain insert into table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 limit 10 @@ -572,6 +612,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -656,6 +697,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_limit + PREHOOK: query: explain insert into table over1k_part_buck partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert into table over1k_part_buck partition(t) select si,i,b,f,t from over1k where t is null or t=27 @@ -665,6 +713,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -729,6 +778,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck + PREHOOK: query: explain insert into table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert into table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k where t is null or t=27 @@ -738,6 +794,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -802,6 +859,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort + PREHOOK: query: insert into table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY PREHOOK: Input: default@over1k @@ -1281,6 +1345,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1288,6 +1353,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1324,6 +1390,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part2 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 16), compute_stats(i, 16), compute_stats(b, 16), compute_stats(f, 16) + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1344,6 +1446,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2 + PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i @@ -1353,6 +1462,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1418,6 +1528,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2 + PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from (select * from over1k order by i limit 10) tmp where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from (select * from over1k order by i limit 10) tmp where t is null or t=27 @@ -1427,6 +1544,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1513,6 +1631,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2 + PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t @@ -1522,6 +1647,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1529,6 +1655,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1571,6 +1698,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part2 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 16), compute_stats(i, 16), compute_stats(b, 16), compute_stats(f, 16) + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1110 Data size: 26640 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1110 Data size: 26640 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1110 Data size: 26640 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1591,6 +1754,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2 + PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t @@ -1600,6 +1770,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1670,6 +1841,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2 + PREHOOK: query: insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i PREHOOK: type: QUERY PREHOOK: Input: default@over1k @@ -1712,7 +1890,7 @@ Database: default Table: over1k_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -1754,7 +1932,7 @@ Database: default Table: over1k_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 @@ -1991,6 +2169,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1998,6 +2177,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2035,6 +2215,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part_buck_sort2 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 16), compute_stats(i, 16), compute_stats(b, 16), compute_stats(f, 16) + keys: t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2054,6 +2270,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort2 + PREHOOK: query: explain insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k where t is null or t=27 @@ -2063,6 +2286,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -2127,6 +2351,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort2 + PREHOOK: query: insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY PREHOOK: Input: default@over1k @@ -2168,7 +2399,7 @@ Database: default Table: over1k_part_buck_sort2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -2209,7 +2440,7 @@ Database: default Table: over1k_part_buck_sort2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 @@ -2438,6 +2669,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -2504,6 +2736,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 + PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where t=27 @@ -2513,6 +2752,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -2579,6 +2819,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 + PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 @@ -2588,6 +2835,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -2654,6 +2902,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 + PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 and t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 and t=27 @@ -2663,6 +2918,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -2729,6 +2985,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 + PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 and s="foo" PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 and s="foo" @@ -2738,6 +3001,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -2804,6 +3068,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 + PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where t=27 and s="foo" PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where t=27 and s="foo" @@ -2813,6 +3084,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -2879,6 +3151,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 + PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 and t=27 and s="foo" PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 and t=27 and s="foo" @@ -2888,11 +3167,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -2914,8 +3197,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part3 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) + outputColumnNames: si, b, f, s, t, i + Statistics: Num rows: 107 Data size: 13282 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 16), compute_stats(b, 16), compute_stats(f, 16) + keys: s (type: string), t (type: tinyint), i (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 107 Data size: 13282 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + Statistics: Num rows: 107 Data size: 13282 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 53 Data size: 6578 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 53 Data size: 6578 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 53 Data size: 6578 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2937,6 +3256,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 + PREHOOK: query: insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where s="foo" PREHOOK: type: QUERY PREHOOK: Input: default@over1k diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out index d4811d64d7..1bafedcd9f 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out @@ -77,6 +77,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -146,6 +147,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: ss_net_paid_inc_tax, ss_net_profit + Column Types: float, float + Table: default.ss_part + PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) select ss_net_paid_inc_tax, ss_net_profit, @@ -320,6 +328,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -384,6 +393,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: ss_net_paid_inc_tax, ss_net_profit + Column Types: float, float + Table: default.ss_part + PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) select ss_net_paid_inc_tax, ss_net_profit, @@ -558,6 +574,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -607,6 +624,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.ss_part + Select Operator + expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) + outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ss_net_paid_inc_tax, 16), compute_stats(ss_net_profit, 16) + keys: ss_sold_date_sk (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -626,6 +664,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: ss_net_paid_inc_tax, ss_net_profit + Column Types: float, float + Table: default.ss_part + PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) select ss_net_paid_inc_tax, ss_net_profit, @@ -679,7 +724,7 @@ Database: default Table: ss_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 11 rawDataSize 151 @@ -739,7 +784,7 @@ Database: default Table: ss_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 13 rawDataSize 186 @@ -800,13 +845,14 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -822,7 +868,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: + key expressions: _col2 (type: int) + sort order: + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) @@ -843,6 +890,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.ss_part + Select Operator + expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) + outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ss_net_paid_inc_tax, 16), compute_stats(ss_net_profit, 16) + keys: ss_sold_date_sk (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -862,6 +930,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: ss_net_paid_inc_tax, ss_net_profit + Column Types: float, float + Table: default.ss_part + PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) select ss_net_paid_inc_tax, ss_net_profit, @@ -909,7 +984,7 @@ Database: default Table: ss_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 11 rawDataSize 151 @@ -969,7 +1044,7 @@ Database: default Table: ss_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 13 rawDataSize 186 @@ -1091,6 +1166,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1104,42 +1180,63 @@ STAGE PLANS: Map Operator Tree: TableScan alias: ss_orc - Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int), KEY._col1 (type: float), KEY._col2 (type: float) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: float), _col2 (type: float), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.ss_part_orc + Select Operator + expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) + outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(ss_net_paid_inc_tax, 16), compute_stats(ss_net_profit, 16) + keys: ss_sold_date_sk (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 956 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 956 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 956 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1159,6 +1256,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: ss_net_paid_inc_tax, ss_net_profit + Column Types: float, float + Table: default.ss_part_orc + PREHOOK: query: insert overwrite table ss_part_orc partition (ss_sold_date_sk) select ss_net_paid_inc_tax, ss_net_profit, @@ -1212,7 +1316,7 @@ Database: default Table: ss_part_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 11 rawDataSize 88 @@ -1272,7 +1376,7 @@ Database: default Table: ss_part_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 13 rawDataSize 104 @@ -1333,49 +1437,72 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: ss_orc - Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_net_paid_inc_tax (type: float), ss_net_profit (type: float), ss_sold_date_sk (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: + key expressions: _col2 (type: int) + sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.ss_part_orc + Select Operator + expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) + outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk + Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(ss_net_paid_inc_tax, 16), compute_stats(ss_net_profit, 16) + keys: ss_sold_date_sk (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 956 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 956 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 956 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1395,6 +1522,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: ss_net_paid_inc_tax, ss_net_profit + Column Types: float, float + Table: default.ss_part_orc + PREHOOK: query: insert overwrite table ss_part_orc partition (ss_sold_date_sk) select ss_net_paid_inc_tax, ss_net_profit, @@ -1442,7 +1576,7 @@ Database: default Table: ss_part_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 11 rawDataSize 88 @@ -1502,7 +1636,7 @@ Database: default Table: ss_part_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 13 rawDataSize 104 @@ -1601,6 +1735,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1608,6 +1743,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1634,7 +1770,7 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1654,6 +1790,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.hive13_dp1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: k1, k2, day + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + keys: day (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1039 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1039 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1047 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1047 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1047 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1673,6 +1845,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.hive13_dp1 + PREHOOK: query: insert overwrite table `hive13_dp1` partition(`day`) select key k1, @@ -1731,6 +1910,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1738,6 +1918,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1764,7 +1945,7 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1784,6 +1965,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.hive13_dp1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: k1, k2, day + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + keys: day (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1039 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1039 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1047 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1047 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1047 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1803,6 +2020,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.hive13_dp1 + PREHOOK: query: insert overwrite table `hive13_dp1` partition(`day`) select key k1, diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out index 788854aa04..7dbd639b15 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out @@ -92,19 +92,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_part - Statistics: Num rows: 1600 Data size: 30800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 139200 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 696 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2752 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2752 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -113,10 +113,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2752 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2752 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -185,19 +185,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_part - Statistics: Num rows: 1600 Data size: 312400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 433600 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 800 Data size: 147200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 2168 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), ds (type: string) outputColumnNames: _col0, _col3 - Statistics: Num rows: 800 Data size: 347200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 3472 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 800 Data size: 347200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 3472 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col3 (type: string) Execution mode: llap LLAP IO: may be used (ACID table) @@ -207,10 +207,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 800 Data size: 347200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 3472 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 800 Data size: 347200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 3472 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -372,19 +372,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_part_sdpo - Statistics: Num rows: 1600 Data size: 30800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 139200 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 139200 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 550400 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 550400 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -393,10 +393,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 550400 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 550400 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -465,19 +465,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_part_sdpo - Statistics: Num rows: 1600 Data size: 312400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 433600 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 800 Data size: 147200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 433600 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), ds (type: string) outputColumnNames: _col0, _col3 - Statistics: Num rows: 800 Data size: 347200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 694400 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col3 (type: string), '_bucket_number' (type: string), _col0 (type: struct) sort order: +++ Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 800 Data size: 347200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 694400 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -486,11 +486,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: struct), 'foo' (type: string), 'bar' (type: string), KEY._col3 (type: string), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, '_bucket_number' - Statistics: Num rows: 800 Data size: 286400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 572800 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 800 Data size: 286400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 572800 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -661,19 +661,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part - Statistics: Num rows: 1600 Data size: 30800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 139200 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 696 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2784 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2784 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -682,10 +682,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), 11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2784 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2784 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -755,19 +755,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part - Statistics: Num rows: 3200 Data size: 48800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 291200 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 1600 Data size: 6400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 16 Data size: 1456 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), hr (type: int) outputColumnNames: _col0, _col4 - Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 16 Data size: 5568 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 16 Data size: 5568 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col4 (type: int) Execution mode: llap LLAP IO: may be used (ACID table) @@ -777,10 +777,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), VALUE._col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 16 Data size: 5568 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 16 Data size: 5568 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -879,19 +879,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part - Statistics: Num rows: 3200 Data size: 637600 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 892800 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (value = 'bar') (type: boolean) - Statistics: Num rows: 1600 Data size: 300800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 15 Data size: 4185 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), ds (type: string), hr (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1600 Data size: 422400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 15 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1600 Data size: 422400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 15 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string), _col2 (type: int) Execution mode: llap LLAP IO: may be used (ACID table) @@ -901,10 +901,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1600 Data size: 422400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 15 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1600 Data size: 422400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 15 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1073,19 +1073,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part_sdpo - Statistics: Num rows: 1600 Data size: 30800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 139200 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 139200 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -1094,10 +1094,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), 11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1167,19 +1167,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part_sdpo - Statistics: Num rows: 3200 Data size: 48800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 291200 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 1600 Data size: 6400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 291200 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), hr (type: int) outputColumnNames: _col0, _col4 - Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 1113600 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: '2008-04-08' (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct) sort order: ++++ Map-reduce partition columns: '2008-04-08' (type: string), _col4 (type: int) - Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 1113600 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -1188,11 +1188,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 1600 Data size: 435200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 870400 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 1600 Data size: 435200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 870400 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1291,19 +1291,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part_sdpo - Statistics: Num rows: 3200 Data size: 637600 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 880000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (value = 'bar') (type: boolean) - Statistics: Num rows: 1600 Data size: 300800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 880000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), ds (type: string), hr (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1600 Data size: 422400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 844800 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col1 (type: string), _col2 (type: int), '_bucket_number' (type: string), _col0 (type: struct) sort order: ++++ Map-reduce partition columns: _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 1600 Data size: 422400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 844800 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -1312,11 +1312,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: struct), KEY._col1 (type: string), KEY._col2 (type: int), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, '_bucket_number' - Statistics: Num rows: 1600 Data size: 579200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 1158400 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 1600 Data size: 579200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 1158400 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1485,19 +1485,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part_sdpo_no_cp - Statistics: Num rows: 1600 Data size: 318800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 440000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 800 Data size: 150400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 440000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), key (type: string), ds (type: string), hr (type: int) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 800 Data size: 280800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 700800 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct) sort order: ++++ Map-reduce partition columns: _col3 (type: string), _col4 (type: int) - Statistics: Num rows: 800 Data size: 280800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 700800 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string), 'bar' (type: string) Execution mode: llap LLAP IO: may be used (ACID table) @@ -1507,11 +1507,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: struct), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 800 Data size: 359200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 857600 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 800 Data size: 359200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 857600 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1581,19 +1581,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part_sdpo_no_cp - Statistics: Num rows: 3200 Data size: 637600 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 880000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 1600 Data size: 300800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 880000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), key (type: string), ds (type: string), hr (type: int) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 1600 Data size: 561600 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 1401600 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct) sort order: ++++ Map-reduce partition columns: _col3 (type: string), _col4 (type: int) - Statistics: Num rows: 1600 Data size: 561600 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 1401600 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string), 'bar' (type: string) Execution mode: llap LLAP IO: may be used (ACID table) @@ -1603,11 +1603,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: struct), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 1600 Data size: 718400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 1715200 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 1600 Data size: 718400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 1715200 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/except_distinct.q.out b/ql/src/test/results/clientpositive/llap/except_distinct.q.out index e4c2941f67..aa4975e76c 100644 --- a/ql/src/test/results/clientpositive/llap/except_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/except_distinct.q.out @@ -721,42 +721,42 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: int) outputColumnNames: value - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: value (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: key - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -766,32 +766,32 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(2) keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), 2 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: bigint), (_col1 * _col2) (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), sum(_col2) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -801,17 +801,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col1 > 0) and ((_col1 * 2) = _col2)) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -823,32 +823,32 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), 1 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: bigint), (_col1 * _col2) (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), sum(_col2) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index 8b04bc9261..d2a09b8f66 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -45,21 +45,39 @@ POSTHOOK: query: explain insert overwrite table src_orc_merge_test_part partitio POSTHOOK: type: QUERY Plan optimized by CBO. -Stage-3 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.src_orc_merge_test_part"} - Stage-2 - Dependency Collection{} - Stage-1 - Map 1 llap - File Output Operator [FS_3] - table:{"name:":"default.src_orc_merge_test_part"} - Select Operator [SEL_1] (rows=500 width=95) - Output:["_col0","_col1"] - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-4 + Column Stats Work{} + Stage-3 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.src_orc_merge_test_part"} + Stage-2 + Dependency Collection{} + Stage-1 + Reducer 2 llap + File Output Operator [FS_6] + Select Operator [SEL_5] (rows=1 width=1157) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_4] (rows=1 width=1157) + Output:["_col0","_col1","_col2","_col3"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"],keys:KEY._col0, KEY._col1 + <-Map 1 [SIMPLE_EDGE] llap + File Output Operator [FS_3] + table:{"name:":"default.src_orc_merge_test_part"} + Select Operator [SEL_1] (rows=500 width=95) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + SHUFFLE [RS_3] + PartitionCols:_col0, _col1 + Group By Operator [GBY_2] (rows=1 width=1165) + Output:["_col0","_col1","_col2","_col3"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"],keys:ds, ts + Select Operator [SEL_1] (rows=500 width=292) + Output:["key","value","ds","ts"] + Please refer to the previous Select Operator [SEL_1] PREHOOK: query: insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src PREHOOK: type: QUERY @@ -79,32 +97,48 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Stage-3 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.src_orc_merge_test_part"} - Stage-2 - Dependency Collection{} - Stage-1 - Reducer 2 llap - File Output Operator [FS_7] - table:{"name:":"default.src_orc_merge_test_part"} - Select Operator [SEL_6] (rows=100 width=95) - Output:["_col0","_col1"] - Limit [LIM_5] (rows=100 width=178) - Number of rows:100 - Select Operator [SEL_4] (rows=100 width=178) - Output:["_col0","_col1"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_3] - Limit [LIM_2] (rows=100 width=178) - Number of rows:100 - Select Operator [SEL_1] (rows=500 width=178) +Stage-4 + Column Stats Work{} + Stage-3 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.src_orc_merge_test_part"} + Stage-2 + Dependency Collection{} + Stage-1 + Reducer 3 llap + File Output Operator [FS_6] + Select Operator [SEL_5] (rows=1 width=1157) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_4] (rows=1 width=1157) + Output:["_col0","_col1","_col2","_col3"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] llap + File Output Operator [FS_7] + table:{"name:":"default.src_orc_merge_test_part"} + Select Operator [SEL_6] (rows=100 width=95) Output:["_col0","_col1"] - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Limit [LIM_5] (rows=100 width=178) + Number of rows:100 + Select Operator [SEL_4] (rows=100 width=178) + Output:["_col0","_col1"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_3] + Limit [LIM_2] (rows=100 width=178) + Number of rows:100 + Select Operator [SEL_1] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + SHUFFLE [RS_3] + PartitionCols:_col0, _col1 + Group By Operator [GBY_2] (rows=1 width=1165) + Output:["_col0","_col1","_col2","_col3"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"],keys:ds, ts + Select Operator [SEL_1] (rows=100 width=292) + Output:["key","value","ds","ts"] + Please refer to the previous Select Operator [SEL_6] PREHOOK: query: explain select count(1) from src_orc_merge_test_part where ds='2012-01-03' and ts='2012-01-03+14:46:31' PREHOOK: type: QUERY @@ -137,10 +171,10 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_5] Group By Operator [GBY_4] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["sum(_col0)","sum(_col1)"] - Select Operator [SEL_2] (rows=500 width=102) + Select Operator [SEL_2] (rows=500 width=95) Output:["_col0","_col1"] - TableScan [TS_0] (rows=500 width=102) - default@src_orc_merge_test_part,src_orc_merge_test_part,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=500 width=95) + default@src_orc_merge_test_part,src_orc_merge_test_part,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: alter table src_orc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31') concatenate PREHOOK: type: ALTER_PARTITION_MERGE @@ -196,10 +230,10 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_5] Group By Operator [GBY_4] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["sum(_col0)","sum(_col1)"] - Select Operator [SEL_2] (rows=500 width=102) + Select Operator [SEL_2] (rows=500 width=95) Output:["_col0","_col1"] - TableScan [TS_0] (rows=500 width=102) - default@src_orc_merge_test_part,src_orc_merge_test_part,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=500 width=95) + default@src_orc_merge_test_part,src_orc_merge_test_part,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: drop table src_orc_merge_test_part PREHOOK: type: DROPTABLE @@ -3100,10 +3134,10 @@ Stage-0 Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_3] - Select Operator [SEL_1] (rows=5 width=6) + Select Operator [SEL_1] (rows=5 width=93) Output:["_col0","_col1"] - TableScan [TS_0] (rows=5 width=6) - default@tgt_rc_merge_test,tgt_rc_merge_test,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=5 width=93) + default@tgt_rc_merge_test,tgt_rc_merge_test,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: alter table tgt_rc_merge_test concatenate PREHOOK: type: ALTER_TABLE_MERGE @@ -3172,10 +3206,10 @@ Stage-0 Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_3] - Select Operator [SEL_1] (rows=5 width=6) + Select Operator [SEL_1] (rows=5 width=93) Output:["_col0","_col1"] - TableScan [TS_0] (rows=5 width=6) - default@tgt_rc_merge_test,tgt_rc_merge_test,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=5 width=93) + default@tgt_rc_merge_test,tgt_rc_merge_test,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: drop table src_rc_merge_test PREHOOK: type: DROPTABLE @@ -3819,20 +3853,20 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_6] - Select Operator [SEL_5] (rows=28 width=7) + Select Operator [SEL_5] (rows=56 width=16) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_7] (rows=28 width=7) + Merge Join Operator [MERGEJOIN_7] (rows=56 width=16) Conds:RS_2.key=RS_3.key(Inner),Output:["_col0","_col1","_col5","_col6"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_2] PartitionCols:key TableScan [TS_0] (rows=26 width=7) - default@smb_input1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@smb_input1,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_3] PartitionCols:key TableScan [TS_1] (rows=26 width=7) - default@smb_input1,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@smb_input1,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain select /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key <=> b.key AND a.value <=> b.value PREHOOK: type: QUERY @@ -3849,20 +3883,20 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_6] - Select Operator [SEL_5] (rows=28 width=7) + Select Operator [SEL_5] (rows=4 width=16) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_7] (rows=28 width=7) + Merge Join Operator [MERGEJOIN_7] (rows=4 width=16) Conds:RS_2.key, value=RS_3.key, value(Inner),Output:["_col0","_col1","_col5","_col6"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_2] PartitionCols:key, value TableScan [TS_0] (rows=26 width=7) - default@smb_input1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@smb_input1,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_3] PartitionCols:key, value TableScan [TS_1] (rows=26 width=7) - default@smb_input1,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@smb_input1,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain select /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input1 b ON a.key <=> b.key PREHOOK: type: QUERY @@ -3879,20 +3913,20 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_6] - Select Operator [SEL_5] (rows=28 width=7) + Select Operator [SEL_5] (rows=56 width=16) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_7] (rows=28 width=7) + Merge Join Operator [MERGEJOIN_7] (rows=56 width=16) Conds:RS_2.key=RS_3.key(Right Outer),Output:["_col0","_col1","_col5","_col6"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_2] PartitionCols:key TableScan [TS_0] (rows=26 width=7) - default@smb_input1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@smb_input1,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_3] PartitionCols:key TableScan [TS_1] (rows=26 width=7) - default@smb_input1,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@smb_input1,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain select /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key <=> b.key PREHOOK: type: QUERY @@ -3909,20 +3943,20 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_6] - Select Operator [SEL_5] (rows=28 width=7) + Select Operator [SEL_5] (rows=56 width=16) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_7] (rows=28 width=7) + Merge Join Operator [MERGEJOIN_7] (rows=56 width=16) Conds:RS_2.key=RS_3.key(Inner),Output:["_col0","_col1","_col5","_col6"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_2] PartitionCols:key TableScan [TS_0] (rows=26 width=7) - default@smb_input1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@smb_input1,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_3] PartitionCols:key TableScan [TS_1] (rows=26 width=7) - default@smb_input1,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@smb_input1,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain select /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input1 b ON a.key <=> b.key PREHOOK: type: QUERY @@ -3939,20 +3973,20 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_6] - Select Operator [SEL_5] (rows=28 width=7) + Select Operator [SEL_5] (rows=56 width=16) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_7] (rows=28 width=7) + Merge Join Operator [MERGEJOIN_7] (rows=56 width=16) Conds:RS_2.key=RS_3.key(Left Outer),Output:["_col0","_col1","_col5","_col6"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_2] PartitionCols:key TableScan [TS_0] (rows=26 width=7) - default@smb_input1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@smb_input1,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_3] PartitionCols:key TableScan [TS_1] (rows=26 width=7) - default@smb_input1,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@smb_input1,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: drop table sales PREHOOK: type: DROPTABLE @@ -4900,66 +4934,102 @@ Plan not optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 2 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Reducer 2 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) -Stage-4 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.part_4"} - Stage-3 - Dependency Collection{} - Stage-2 - Reducer 3 llap - File Output Operator [FS_9] - table:{"name:":"default.part_4"} - Select Operator [SEL_7] (rows=26 width=239) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - PTF Operator [PTF_6] (rows=26 width=499) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col2"}] - Select Operator [SEL_5] (rows=26 width=499) - Output:["_col1","_col2","_col5","_col7"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_4] - PartitionCols:_col2 - PTF Operator [PTF_3] (rows=26 width=499) - Function definitions:[{},{"Partition table definition":{"name:":"noop","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col2"}}] - Select Operator [SEL_2] (rows=26 width=499) - Output:["_col1","_col2","_col5","_col7"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_1] - PartitionCols:p_mfgr - TableScan [TS_0] (rows=26 width=231) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_retailprice","p_size"] - Reducer 5 llap - File Output Operator [FS_20] - table:{"name:":"default.part_5"} - Select Operator [SEL_17] (rows=26 width=247) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - PTF Operator [PTF_16] (rows=26 width=499) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col3"}] - Select Operator [SEL_15] (rows=26 width=499) - Output:["_col0","_col2","_col3","_col6"] - <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_14] - PartitionCols:_col2 - Select Operator [SEL_13] (rows=26 width=491) - Output:["sum_window_0","_col1","_col2","_col5"] - PTF Operator [PTF_12] (rows=26 width=491) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}] - Select Operator [SEL_11] (rows=26 width=491) - Output:["_col1","_col2","_col5"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - PartitionCols:_col2 - Please refer to the previous PTF Operator [PTF_3] -Stage-5 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.part_5"} - Please refer to the previous Stage-3 +Stage-6 + Column Stats Work{} + Stage-4 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.part_4"} + Stage-3 + Dependency Collection{} + Stage-2 + Reducer 5 llap + File Output Operator [FS_7] + Group By Operator [GBY_5] (rows=1 width=2880) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)","compute_stats(VALUE._col3)","compute_stats(VALUE._col4)","compute_stats(VALUE._col5)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_4] + Group By Operator [GBY_3] (rows=1 width=2888) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)","compute_stats(VALUE._col3, 16)","compute_stats(VALUE._col4, 16)","compute_stats(VALUE._col5, 16)","compute_stats(VALUE._col6, 16)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_9] + table:{"name:":"default.part_4"} + Select Operator [SEL_7] (rows=26 width=239) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + PTF Operator [PTF_6] (rows=26 width=499) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col2"}] + Select Operator [SEL_5] (rows=26 width=499) + Output:["_col1","_col2","_col5","_col7"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] + PartitionCols:_col2 + PTF Operator [PTF_3] (rows=26 width=499) + Function definitions:[{},{"Partition table definition":{"name:":"noop","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col2"}}] + Select Operator [SEL_2] (rows=26 width=499) + Output:["_col1","_col2","_col5","_col7"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_1] + PartitionCols:p_mfgr + TableScan [TS_0] (rows=26 width=231) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_retailprice","p_size"] + PARTITION_ONLY_SHUFFLE [RS_2] + PartitionCols:rand() + Select Operator [SEL_1] (rows=26 width=239) + Output:["p_mfgr","p_name","p_size","r","dr","s"] + Please refer to the previous Select Operator [SEL_7] + Reducer 9 llap + File Output Operator [FS_7] + Group By Operator [GBY_5] (rows=1 width=3840) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)","compute_stats(VALUE._col3)","compute_stats(VALUE._col4)","compute_stats(VALUE._col5)","compute_stats(VALUE._col6)","compute_stats(VALUE._col7)"] + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_4] + Group By Operator [GBY_3] (rows=1 width=3840) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)","compute_stats(VALUE._col3, 16)","compute_stats(VALUE._col4, 16)","compute_stats(VALUE._col5, 16)","compute_stats(VALUE._col6, 16)","compute_stats(VALUE._col7, 16)","compute_stats(VALUE._col8, 16)"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_20] + table:{"name:":"default.part_5"} + Select Operator [SEL_17] (rows=26 width=247) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + PTF Operator [PTF_16] (rows=26 width=499) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col3"}] + Select Operator [SEL_15] (rows=26 width=499) + Output:["_col0","_col2","_col3","_col6"] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] + PartitionCols:_col2 + Select Operator [SEL_13] (rows=26 width=491) + Output:["sum_window_0","_col1","_col2","_col5"] + PTF Operator [PTF_12] (rows=26 width=491) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}] + Select Operator [SEL_11] (rows=26 width=491) + Output:["_col1","_col2","_col5"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_10] + PartitionCols:_col2 + Please refer to the previous PTF Operator [PTF_3] + PARTITION_ONLY_SHUFFLE [RS_2] + PartitionCols:rand() + Select Operator [SEL_1] (rows=26 width=247) + Output:["p_mfgr","p_name","p_size","s2","r","dr","cud","fv1"] + Please refer to the previous Select Operator [SEL_17] + Stage-5 + Stats-Aggr Operator + Stage-1 + Move Operator + table:{"name:":"default.part_5"} + Please refer to the previous Stage-3 +Stage-7 + Column Stats Work{} + Please refer to the previous Stage-4 + Please refer to the previous Stage-5 PREHOOK: query: explain select p_mfgr, p_name, @@ -5315,41 +5385,58 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) -Stage-3 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.dest_j1"} - Stage-2 - Dependency Collection{} - Stage-1 - Reducer 2 llap - File Output Operator [FS_11] - table:{"name:":"default.dest_j1"} - Select Operator [SEL_9] (rows=1219 width=95) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_16] (rows=1219 width=178) - Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col2"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_6] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=500 width=87) - Output:["_col0"] - Filter Operator [FIL_14] (rows=500 width=87) - predicate:key is not null - TableScan [TS_0] (rows=500 width=87) - default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_15] (rows=500 width=178) - predicate:key is not null - TableScan [TS_3] (rows=500 width=178) - default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] +Stage-4 + Column Stats Work{} + Stage-3 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.dest_j1"} + Stage-2 + Dependency Collection{} + Stage-1 + Reducer 4 llap + File Output Operator [FS_7] + Group By Operator [GBY_5] (rows=1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_4] + Group By Operator [GBY_3] (rows=1 width=968) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_11] + table:{"name:":"default.dest_j1"} + Select Operator [SEL_9] (rows=1219 width=95) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_16] (rows=1219 width=178) + Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_6] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=500 width=87) + Output:["_col0"] + Filter Operator [FIL_14] (rows=500 width=87) + predicate:key is not null + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_15] (rows=500 width=178) + predicate:key is not null + TableScan [TS_3] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + PARTITION_ONLY_SHUFFLE [RS_2] + PartitionCols:rand() + Select Operator [SEL_1] (rows=1219 width=95) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_9] PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 select src1.key, src2.value diff --git a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out index e3f70b097f..6078de57ce 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -3250,72 +3250,101 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -Stage-4 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.dest1"} - Stage-3 - Dependency Collection{} - Stage-2 - Reducer 5 llap - File Output Operator [FS_22] - table:{"name:":"default.dest1"} - Select Operator [SEL_20] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_19] (rows=1 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0 - Group By Operator [GBY_17] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Group By Operator [GBY_14] (rows=1 width=272) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 3 [SIMPLE_EDGE] - <-Map 6 [CONTAINS] llap - Reduce Output Operator [RS_13] - PartitionCols:_col0, _col1 - Group By Operator [GBY_12] (rows=1 width=272) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_8] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_7] (rows=500 width=10) - Output:["key","value"] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_13] - PartitionCols:_col0, _col1 - Group By Operator [GBY_12] (rows=1 width=272) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_6] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_5] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_4] - Group By Operator [GBY_3] (rows=1 width=8) - Output:["_col0"],aggregations:["count(1)"] - Select Operator [SEL_1] (rows=500 width=10) - TableScan [TS_0] (rows=500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - File Output Operator [FS_29] - table:{"name:":"default.dest2"} - Select Operator [SEL_27] (rows=1 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_26] (rows=1 width=464) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 - Please refer to the previous Group By Operator [GBY_14] -Stage-5 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.dest2"} - Please refer to the previous Stage-3 +Stage-6 + Column Stats Work{} + Stage-4 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.dest1"} + Stage-3 + Dependency Collection{} + Stage-2 + Reducer 6 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_22] + table:{"name:":"default.dest1"} + Select Operator [SEL_20] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_19] (rows=1 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Reducer 4 [SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_18] + PartitionCols:_col0 + Group By Operator [GBY_17] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Group By Operator [GBY_14] (rows=1 width=272) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_13] + PartitionCols:_col0, _col1 + Group By Operator [GBY_12] (rows=1 width=272) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_8] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_7] (rows=500 width=10) + Output:["key","value"] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_13] + PartitionCols:_col0, _col1 + Group By Operator [GBY_12] (rows=1 width=272) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_6] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_5] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_4] + Group By Operator [GBY_3] (rows=1 width=8) + Output:["_col0"],aggregations:["count(1)"] + Select Operator [SEL_1] (rows=500 width=10) + TableScan [TS_0] (rows=500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + File Output Operator [FS_29] + table:{"name:":"default.dest2"} + Select Operator [SEL_27] (rows=1 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_26] (rows=1 width=464) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 + Please refer to the previous Group By Operator [GBY_14] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=1476) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(key, 16)","compute_stats(val1, 16)","compute_stats(val2, 16)"] + Select Operator [SEL_1] (rows=1 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_27] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=1 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_20] + Reducer 7 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=1440) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)"] + <- Please refer to the previous Reducer 4 [CUSTOM_SIMPLE_EDGE] + Stage-5 + Stats-Aggr Operator + Stage-1 + Move Operator + table:{"name:":"default.dest2"} + Please refer to the previous Stage-3 +Stage-7 + Column Stats Work{} + Please refer to the previous Stage-4 + Please refer to the previous Stage-5 PREHOOK: query: EXPLAIN FROM UNIQUEJOIN PRESERVE src a (a.key), PRESERVE src1 b (b.key), PRESERVE srcpart c (c.key) SELECT a.key, b.key, c.key PREHOOK: type: QUERY @@ -3423,91 +3452,119 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) -Map 7 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) +Map 9 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) -Reducer 5 <- Union 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Union 3 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Stage-4 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.dest1"} - Stage-3 - Dependency Collection{} - Stage-2 - Reducer 4 llap - File Output Operator [FS_20] - table:{"name:":"default.dest1"} - Select Operator [SEL_18] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_17] (rows=1 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 6 [CONTAINS] llap - Reduce Output Operator [RS_16] - PartitionCols:_col0 - Group By Operator [GBY_15] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_8] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_7] (rows=500 width=10) - Output:["key","value"] - Reduce Output Operator [RS_23] - PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=1 width=464) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_8] - <-Map 7 [CONTAINS] llap - Reduce Output Operator [RS_16] - PartitionCols:_col0 - Group By Operator [GBY_15] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_12] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_11] (rows=500 width=10) - Output:["key","value"] - Reduce Output Operator [RS_23] - PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=1 width=464) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_12] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_16] - PartitionCols:_col0 - Group By Operator [GBY_15] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_6] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_5] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_4] - Group By Operator [GBY_3] (rows=1 width=8) - Output:["_col0"],aggregations:["count(1)"] - Select Operator [SEL_1] (rows=500 width=10) - TableScan [TS_0] (rows=500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_23] - PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=1 width=464) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_6] - Reducer 5 llap - File Output Operator [FS_27] - table:{"name:":"default.dest2"} - Select Operator [SEL_25] (rows=1 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_24] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] -Stage-5 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.dest2"} - Please refer to the previous Stage-3 +Stage-6 + Column Stats Work{} + Stage-4 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.dest1"} + Stage-3 + Dependency Collection{} + Stage-2 + Reducer 5 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_20] + table:{"name:":"default.dest1"} + Select Operator [SEL_18] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_17] (rows=1 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_16] + PartitionCols:_col0 + Group By Operator [GBY_15] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_8] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_7] (rows=500 width=10) + Output:["key","value"] + Reduce Output Operator [RS_23] + PartitionCols:_col0, _col1 + Group By Operator [GBY_22] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_8] + <-Map 9 [CONTAINS] llap + Reduce Output Operator [RS_16] + PartitionCols:_col0 + Group By Operator [GBY_15] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_12] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_11] (rows=500 width=10) + Output:["key","value"] + Reduce Output Operator [RS_23] + PartitionCols:_col0, _col1 + Group By Operator [GBY_22] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_12] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_16] + PartitionCols:_col0 + Group By Operator [GBY_15] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_6] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_5] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_4] + Group By Operator [GBY_3] (rows=1 width=8) + Output:["_col0"],aggregations:["count(1)"] + Select Operator [SEL_1] (rows=500 width=10) + TableScan [TS_0] (rows=500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_23] + PartitionCols:_col0, _col1 + Group By Operator [GBY_22] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_6] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=1 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_18] + Reducer 7 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=1440) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_27] + table:{"name:":"default.dest2"} + Select Operator [SEL_25] (rows=1 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_24] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=1476) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(key, 16)","compute_stats(val1, 16)","compute_stats(val2, 16)"] + Select Operator [SEL_1] (rows=1 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_25] + Stage-5 + Stats-Aggr Operator + Stage-1 + Move Operator + table:{"name:":"default.dest2"} + Please refer to the previous Stage-3 +Stage-7 + Column Stats Work{} + Please refer to the previous Stage-4 + Please refer to the previous Stage-5 PREHOOK: query: explain FROM ( @@ -3532,74 +3589,102 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) -Reducer 5 <- Union 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Union 3 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Stage-4 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.dest1"} - Stage-3 - Dependency Collection{} - Stage-2 - Reducer 4 llap - File Output Operator [FS_17] - table:{"name:":"default.dest1"} - Select Operator [SEL_15] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_14] (rows=1 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 6 [CONTAINS] llap - Reduce Output Operator [RS_13] - PartitionCols:_col0 - Group By Operator [GBY_12] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_8] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_7] (rows=500 width=10) - Output:["key","value"] - Reduce Output Operator [RS_20] - PartitionCols:_col0, _col1 - Group By Operator [GBY_19] (rows=1 width=464) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_8] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_13] - PartitionCols:_col0 - Group By Operator [GBY_12] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_6] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_5] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_4] - Group By Operator [GBY_3] (rows=1 width=8) - Output:["_col0"],aggregations:["count(1)"] - Select Operator [SEL_1] (rows=500 width=10) - TableScan [TS_0] (rows=500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_20] - PartitionCols:_col0, _col1 - Group By Operator [GBY_19] (rows=1 width=464) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_6] - Reducer 5 llap - File Output Operator [FS_24] - table:{"name:":"default.dest2"} - Select Operator [SEL_22] (rows=1 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_21] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] -Stage-5 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.dest2"} - Please refer to the previous Stage-3 +Stage-6 + Column Stats Work{} + Stage-4 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.dest1"} + Stage-3 + Dependency Collection{} + Stage-2 + Reducer 5 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_17] + table:{"name:":"default.dest1"} + Select Operator [SEL_15] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_14] (rows=1 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_13] + PartitionCols:_col0 + Group By Operator [GBY_12] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_8] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_7] (rows=500 width=10) + Output:["key","value"] + Reduce Output Operator [RS_20] + PartitionCols:_col0, _col1 + Group By Operator [GBY_19] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_8] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_13] + PartitionCols:_col0 + Group By Operator [GBY_12] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_6] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_5] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_4] + Group By Operator [GBY_3] (rows=1 width=8) + Output:["_col0"],aggregations:["count(1)"] + Select Operator [SEL_1] (rows=500 width=10) + TableScan [TS_0] (rows=500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_20] + PartitionCols:_col0, _col1 + Group By Operator [GBY_19] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_6] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=1 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_15] + Reducer 7 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=1440) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_24] + table:{"name:":"default.dest2"} + Select Operator [SEL_22] (rows=1 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_21] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=1476) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(key, 16)","compute_stats(val1, 16)","compute_stats(val2, 16)"] + Select Operator [SEL_1] (rows=1 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_22] + Stage-5 + Stats-Aggr Operator + Stage-1 + Move Operator + table:{"name:":"default.dest2"} + Please refer to the previous Stage-3 +Stage-7 + Column Stats Work{} + Please refer to the previous Stage-4 + Please refer to the previous Stage-5 diff --git a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out b/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out index d97223c9d0..43b1880cdd 100644 --- a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out +++ b/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out @@ -197,7 +197,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt,zip @@ -332,7 +332,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt,zip @@ -853,7 +853,7 @@ STAGE PLANS: year 2001 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -899,7 +899,7 @@ STAGE PLANS: year 2002 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -945,7 +945,7 @@ STAGE PLANS: year 2003 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -991,7 +991,7 @@ STAGE PLANS: year 2000 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1083,7 +1083,7 @@ STAGE PLANS: year 2002 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1129,7 +1129,7 @@ STAGE PLANS: year 2003 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1175,7 +1175,7 @@ STAGE PLANS: year 2000 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1221,7 +1221,7 @@ STAGE PLANS: year 2001 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1313,7 +1313,7 @@ STAGE PLANS: year 2003 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt diff --git a/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out b/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out index c5cdb77afc..86309a7f57 100644 --- a/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out +++ b/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out @@ -45,22 +45,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: f - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1450 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1450 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 auto parallelism: true Execution mode: llap @@ -76,7 +76,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -121,22 +121,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: m - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2289 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: ((value <> '') and key is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1375 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 2785 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 2785 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: string) auto parallelism: true @@ -153,7 +153,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -198,22 +198,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: g - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (value <> '') (type: boolean) - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: true Execution mode: llap @@ -229,7 +229,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -281,14 +281,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col3 - Position of Big Table: 0 - Statistics: Num rows: 27 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Position of Big Table: 1 + Statistics: Num rows: 25 Data size: 2325 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 27 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2325 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col0 (type: int) auto parallelism: true @@ -304,17 +304,17 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col5 Position of Big Table: 0 - Statistics: Num rows: 29 Data size: 497 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 4092 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 497 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 4092 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 29 Data size: 497 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 4092 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/groupby2.q.out b/ql/src/test/results/clientpositive/llap/groupby2.q.out index 29b85d1f44..0cee285341 100644 --- a/ql/src/test/results/clientpositive/llap/groupby2.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby2.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -27,6 +28,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -81,6 +84,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 205 Data size: 76260 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 205 Data size: 76260 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: string), c1 (type: int), c2 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -98,6 +137,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest_g2 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/groupby3.q.out b/ql/src/test/results/clientpositive/llap/groupby3.q.out index 3495de6d3f..7f63fb9efd 100644 --- a/ql/src/test/results/clientpositive/llap/groupby3.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby3.q.out @@ -37,6 +37,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -45,6 +46,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -73,6 +75,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: struct), _col3 (type: string), _col4 (type: string), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) Reducer 3 @@ -95,6 +98,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4284 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4284 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -112,6 +143,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out b/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out index 083bfc301c..57d5100466 100644 --- a/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out +++ b/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out @@ -768,14 +768,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -784,15 +784,15 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 3 - Statistics: Num rows: 577 Data size: 18341 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1344 Data size: 10752 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -800,19 +800,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p2 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -822,10 +822,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -889,14 +889,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -905,16 +905,16 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 3 - Statistics: Num rows: 577 Data size: 18341 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1344 Data size: 10752 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -922,19 +922,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p2 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -944,10 +944,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1003,72 +1003,78 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 577 Data size: 18341 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: p2 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 1344 Data size: 10752 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1124,73 +1130,78 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 577 Data size: 18341 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: p2 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 1344 Data size: 10752 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_bucketed_table.q.out b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_bucketed_table.q.out index 1efb81b35f..35b68914b5 100644 --- a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_bucketed_table.q.out +++ b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_bucketed_table.q.out @@ -42,7 +42,7 @@ Database: default Table: test_table_bucketed #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 309 rawDataSize 1482 diff --git a/ql/src/test/results/clientpositive/llap/insert1.q.out b/ql/src/test/results/clientpositive/llap/insert1.q.out index aa09585da8..f49cc95bfc 100644 --- a/ql/src/test/results/clientpositive/llap/insert1.q.out +++ b/ql/src/test/results/clientpositive/llap/insert1.q.out @@ -33,11 +33,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -59,8 +63,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -78,6 +110,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 + PREHOOK: query: explain insert into table INSERT1 select a.key, a.value from insert2 a WHERE (a.key=-1) PREHOOK: type: QUERY POSTHOOK: query: explain insert into table INSERT1 select a.key, a.value from insert2 a WHERE (a.key=-1) @@ -87,11 +126,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -113,8 +156,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -132,6 +203,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 + PREHOOK: query: create database x PREHOOK: type: CREATEDATABASE PREHOOK: Output: database:x @@ -155,11 +233,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -181,8 +263,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -200,6 +310,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: x.insert1 + PREHOOK: query: explain insert into table default.INSERT1 select a.key, a.value from insert2 a WHERE (a.key=-1) PREHOOK: type: QUERY POSTHOOK: query: explain insert into table default.INSERT1 select a.key, a.value from insert2 a WHERE (a.key=-1) @@ -209,11 +326,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -235,8 +356,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -254,6 +403,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 + PREHOOK: query: explain from insert2 insert into table insert1 select * where key < 10 @@ -269,6 +425,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -276,6 +434,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -297,6 +459,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -312,8 +487,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -331,6 +549,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: x.insert1 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/insert_into1.q.out b/ql/src/test/results/clientpositive/llap/insert_into1.q.out index 5caefe8e43..24c225a9d3 100644 --- a/ql/src/test/results/clientpositive/llap/insert_into1.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_into1.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -26,6 +27,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -41,7 +43,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -67,6 +68,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 9500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -84,6 +113,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * from src ORDER BY key LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -141,6 +177,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -148,6 +185,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -163,7 +201,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -189,6 +226,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 9500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -206,6 +271,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -263,6 +335,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -270,6 +343,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -285,7 +359,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -311,6 +384,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -328,6 +429,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + PREHOOK: query: INSERT OVERWRITE TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -385,11 +493,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -409,8 +521,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: 1 (type: int), 'a' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -428,6 +568,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + PREHOOK: query: insert overwrite table insert_into1 select 1, 'a' PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -447,11 +594,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -471,8 +622,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: 2 (type: int), 'b' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -490,6 +669,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + PREHOOK: query: insert into insert_into1 select 2, 'b' PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table diff --git a/ql/src/test/results/clientpositive/llap/insert_into2.q.out b/ql/src/test/results/clientpositive/llap/insert_into2.q.out index a42c651d63..9238082515 100644 --- a/ql/src/test/results/clientpositive/llap/insert_into2.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_into2.q.out @@ -23,6 +23,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -30,6 +31,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -45,7 +47,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -71,6 +72,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 18000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -90,6 +127,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + PREHOOK: query: INSERT INTO TABLE insert_into2 PARTITION (ds='1') SELECT * FROM src order by key limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -186,6 +230,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -193,6 +238,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -208,7 +254,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -234,6 +279,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 18000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -253,6 +334,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 100 PREHOOK: type: QUERY @@ -318,6 +406,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -325,6 +414,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -340,7 +430,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -366,6 +455,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 50 Data size: 9000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -385,6 +510,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 50 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/intersect_all.q.out b/ql/src/test/results/clientpositive/llap/intersect_all.q.out index 242209020f..52ea25c9ed 100644 --- a/ql/src/test/results/clientpositive/llap/intersect_all.q.out +++ b/ql/src/test/results/clientpositive/llap/intersect_all.q.out @@ -1551,42 +1551,42 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: int) outputColumnNames: value - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: value (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: key - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1596,24 +1596,24 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col1), count(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -1623,24 +1623,24 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 2) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE UDTF Operator - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE function name: UDTFReplicateRows Select Operator expressions: col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1652,24 +1652,24 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col1), count(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/intersect_distinct.q.out b/ql/src/test/results/clientpositive/llap/intersect_distinct.q.out index 6921d53b81..3b9e5d7880 100644 --- a/ql/src/test/results/clientpositive/llap/intersect_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/intersect_distinct.q.out @@ -1153,42 +1153,42 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: int) outputColumnNames: value - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: value (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: key - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1198,24 +1198,24 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -1225,17 +1225,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col1 = 2) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1247,24 +1247,24 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/intersect_merge.q.out b/ql/src/test/results/clientpositive/llap/intersect_merge.q.out index a312966720..dfe1cdf068 100644 --- a/ql/src/test/results/clientpositive/llap/intersect_merge.q.out +++ b/ql/src/test/results/clientpositive/llap/intersect_merge.q.out @@ -55,22 +55,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -78,22 +78,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -101,22 +101,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -124,22 +124,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -147,22 +147,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -174,18 +174,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 12 Execution mode: llap @@ -195,18 +195,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 2 Execution mode: llap @@ -216,18 +216,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -237,17 +237,17 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -260,18 +260,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -281,18 +281,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Union 3 Vertex: Union 3 @@ -327,22 +327,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -350,22 +350,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -373,22 +373,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -396,22 +396,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -423,18 +423,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 2 Execution mode: llap @@ -444,18 +444,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -465,17 +465,17 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 4) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -488,18 +488,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -509,18 +509,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Union 3 Vertex: Union 3 @@ -556,22 +556,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -579,22 +579,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -602,22 +602,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -625,22 +625,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -648,22 +648,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -675,18 +675,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 12 Execution mode: llap @@ -696,18 +696,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 2 Execution mode: llap @@ -717,18 +717,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -738,17 +738,17 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -761,18 +761,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -782,18 +782,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Union 3 Vertex: Union 3 @@ -829,22 +829,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -852,22 +852,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -875,22 +875,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -898,22 +898,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -921,22 +921,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -948,18 +948,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 12 Execution mode: llap @@ -969,18 +969,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 2 Execution mode: llap @@ -990,18 +990,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -1011,17 +1011,17 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1034,18 +1034,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -1055,18 +1055,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Union 3 Vertex: Union 3 @@ -1102,22 +1102,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1125,22 +1125,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1148,22 +1148,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1171,22 +1171,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1194,22 +1194,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1221,18 +1221,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 12 Execution mode: llap @@ -1242,18 +1242,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 2 Execution mode: llap @@ -1263,18 +1263,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -1284,17 +1284,17 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1307,18 +1307,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -1328,18 +1328,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Union 3 Vertex: Union 3 @@ -1373,22 +1373,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1396,22 +1396,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1419,22 +1419,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1446,18 +1446,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -1467,17 +1467,17 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 3) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1490,18 +1490,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -1511,18 +1511,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Union 3 Vertex: Union 3 @@ -1556,22 +1556,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1579,22 +1579,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1602,22 +1602,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1629,18 +1629,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col2), count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 4 Execution mode: llap @@ -1650,24 +1650,24 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col3 = 3) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: bigint), _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE UDTF Operator - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE function name: UDTFReplicateRows Select Operator expressions: col1 (type: int), col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1680,18 +1680,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col2), count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 8 Execution mode: llap @@ -1701,18 +1701,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col2), count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Union 3 Vertex: Union 3 @@ -1747,22 +1747,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Select Operator expressions: key (type: int), value (type: int) @@ -1786,22 +1786,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1864,18 +1864,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 7 Execution mode: llap @@ -1919,18 +1919,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/join1.q.out b/ql/src/test/results/clientpositive/llap/join1.q.out index d79a405a41..ab775a28cf 100644 --- a/ql/src/test/results/clientpositive/llap/join1.q.out +++ b/ql/src/test/results/clientpositive/llap/join1.q.out @@ -19,13 +19,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -47,7 +49,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -90,6 +92,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1219 Data size: 115805 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -107,6 +137,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out index c226eed126..f4b3e37934 100644 --- a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out @@ -31,14 +31,16 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -58,13 +60,13 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 3 => 25 + Estimated key counts: Map 4 => 25 keys: 0 _col0 (type: string) 1 _col1 (type: string) outputColumnNames: _col0, _col3 input vertices: - 1 Map 3 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 58 Data size: 10266 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -131,7 +133,7 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] - Map 3 + Map 4 Map Operator Tree: TableScan alias: x @@ -207,7 +209,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 4 + Map 5 Map Operator Tree: TableScan alias: y @@ -332,6 +334,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 141 Data size: 37788 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -368,6 +417,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) @@ -503,15 +560,17 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -531,13 +590,13 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 4 => 25 + Estimated key counts: Map 5 => 25 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 4 + 1 Map 5 Position of Big Table: 0 Statistics: Num rows: 44 Data size: 11616 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -602,7 +661,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 4 + Map 5 Map Operator Tree: TableScan alias: z @@ -678,7 +737,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [z] - Map 5 + Map 6 Map Operator Tree: TableScan alias: w @@ -753,7 +812,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [w] - Map 6 + Map 7 Map Operator Tree: TableScan alias: y @@ -879,7 +938,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -900,6 +959,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 248 Data size: 65968 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 4 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -913,7 +1019,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -936,6 +1042,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src w JOIN src1 x ON (x.value = w.value) @@ -1067,14 +1181,16 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Map 3 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Map 4 <- Map 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1154,7 +1270,7 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] - Map 3 + Map 4 Map Operator Tree: TableScan alias: y @@ -1171,13 +1287,13 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 4 => 25 + Estimated key counts: Map 5 => 25 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 4 + 1 Map 5 Position of Big Table: 0 Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -1246,7 +1362,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [y] - Map 4 + Map 5 Map Operator Tree: TableScan alias: x @@ -1371,6 +1487,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 140 Data size: 37240 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1407,6 +1570,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j2 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, z.value, res.value FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res @@ -1540,14 +1711,16 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1627,7 +1800,7 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] - Map 3 + Map 4 Map Operator Tree: TableScan alias: y @@ -1698,7 +1871,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [y] - Map 5 + Map 6 Map Operator Tree: TableScan alias: x @@ -1802,7 +1975,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -1823,7 +1996,54 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 140 Data size: 37240 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 5 Execution mode: llap Needs Tagging: false Reduce Operator Tree: @@ -1862,7 +2082,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -1885,6 +2105,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j2 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, z.value, res.value FROM (select x.key, x.value from src1 x LEFT OUTER JOIN src y ON (x.key = y.key)) res @@ -2030,14 +2258,16 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Map 3 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Map 4 <- Map 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2059,7 +2289,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: y @@ -2079,7 +2309,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 4 + 1 Map 5 Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) @@ -2093,7 +2323,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: x @@ -2136,6 +2366,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 140 Data size: 37240 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2153,6 +2411,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j2 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, x.value, res.value FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res @@ -2286,14 +2551,16 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Map 3 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Map 4 <- Map 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2315,7 +2582,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: y @@ -2335,7 +2602,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 4 + 1 Map 5 Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) @@ -2349,7 +2616,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: x @@ -2392,6 +2659,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 140 Data size: 37240 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2409,6 +2704,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j2 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, y.value, res.value FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res diff --git a/ql/src/test/results/clientpositive/llap/join46.q.out b/ql/src/test/results/clientpositive/llap/join46.q.out index 079acddb28..f3ea95a107 100644 --- a/ql/src/test/results/clientpositive/llap/join46.q.out +++ b/ql/src/test/results/clientpositive/llap/join46.q.out @@ -62,16 +62,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -79,16 +79,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -102,10 +102,10 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -169,16 +169,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -186,19 +186,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -215,10 +215,10 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -283,14 +283,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -298,17 +298,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -325,10 +325,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -389,16 +389,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -406,16 +406,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -429,10 +429,10 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -491,14 +491,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -506,14 +506,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -530,10 +530,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -603,14 +603,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -618,14 +618,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -640,10 +640,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -716,14 +716,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -731,14 +731,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -753,10 +753,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -827,14 +827,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -842,14 +842,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -864,10 +864,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -934,16 +934,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -951,16 +951,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -975,10 +975,10 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1044,14 +1044,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1059,14 +1059,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1081,14 +1081,14 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or (_col0 = _col3))} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1156,14 +1156,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1171,14 +1171,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1193,10 +1193,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1269,14 +1269,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1284,14 +1284,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1306,10 +1306,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1377,14 +1377,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1392,14 +1392,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1414,10 +1414,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1486,16 +1486,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1503,16 +1503,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1527,10 +1527,10 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1596,14 +1596,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1611,14 +1611,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1633,10 +1633,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1709,14 +1709,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1724,14 +1724,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1746,10 +1746,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1820,14 +1820,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1835,14 +1835,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1857,10 +1857,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1929,16 +1929,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1946,16 +1946,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1970,10 +1970,10 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1910 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1910 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2071,16 +2071,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) @@ -2098,16 +2098,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) @@ -2132,10 +2132,10 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) Reducer 3 Execution mode: llap @@ -2148,10 +2148,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 residual filter predicates: {(_col1 is null or (_col10 is null and (_col7 <> _col4)))} - Statistics: Num rows: 36 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 64 Data size: 24448 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 64 Data size: 24448 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2167,10 +2167,10 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/llap/join_emit_interval.q.out b/ql/src/test/results/clientpositive/llap/join_emit_interval.q.out index 4dfb09bdea..3bf26e5195 100644 --- a/ql/src/test/results/clientpositive/llap/join_emit_interval.q.out +++ b/ql/src/test/results/clientpositive/llap/join_emit_interval.q.out @@ -62,16 +62,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -79,16 +79,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -105,10 +105,10 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -168,14 +168,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -183,14 +183,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -207,10 +207,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/lineage2.q.out b/ql/src/test/results/clientpositive/llap/lineage2.q.out index 79590a7e53..97d7c39814 100644 --- a/ql/src/test/results/clientpositive/llap/lineage2.q.out +++ b/ql/src/test/results/clientpositive/llap/lineage2.q.out @@ -36,7 +36,7 @@ PREHOOK: query: insert into table dest1 select * from src2 PREHOOK: type: QUERY PREHOOK: Input: default@src2 PREHOOK: Output: default@dest1 -{"version":"1.0","engine":"tez","database":"default","hash":"ecc718a966d8887b18084a55dd96f0bc","queryText":"insert into table dest1 select * from src2","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"ecc718a966d8887b18084a55dd96f0bc","queryText":"insert into table dest1 select * from src2","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: select key k, dest1.value from dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 @@ -467,20 +467,20 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"e494b771d94800dc3430bf5d0810cd9f","queryText":"insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"e494b771d94800dc3430bf5d0810cd9f","queryText":"insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 16)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"efeaddd0d36105b1013b414627850dc2","queryText":"insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"efeaddd0d36105b1013b414627850dc2","queryText":"insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 16)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert into table dest2 select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"e9450a56b3d103642e06bef0e4f0d482","queryText":"insert into table dest2\n select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[5],"targets":[0,1,2,3],"expression":"src1.value is not null","edgeType":"PREDICATE"},{"sources":[5,7],"targets":[0,1,2,3],"expression":"(length(src1.value) = (length(src2.value2) + 1))","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2,3],"expression":"src2.value2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"e9450a56b3d103642e06bef0e4f0d482","queryText":"insert into table dest2\n select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[5],"targets":[0,1,2,3],"expression":"src1.value is not null","edgeType":"PREDICATE"},{"sources":[5,7],"targets":[0,1,2,3],"expression":"(length(src1.value) = (length(src2.value2) + 1))","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2,3],"expression":"src2.value2 is not null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 16)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: select * from src1 where length(key) > 2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 @@ -530,7 +530,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"76d84512204ddc576ad4d93f252e4358","queryText":"insert overwrite table dest2\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 3) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 3) and src2.key2 is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"76d84512204ddc576ad4d93f252e4358","queryText":"insert overwrite table dest2\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 3) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 3) and src2.key2 is not null)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 16)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: drop table if exists dest_l1 PREHOOK: type: DROPTABLE PREHOOK: query: CREATE TABLE dest_l1(key INT, value STRING) STORED AS TEXTFILE @@ -552,7 +552,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Output: default@dest_l1 -{"version":"1.0","engine":"tez","database":"default","hash":"60b589744e2527dd235a6c8168d6a653","queryText":"INSERT OVERWRITE TABLE dest_l1\nSELECT j.*\nFROM (SELECT t1.key, p1.value\n FROM src1 t1\n LEFT OUTER JOIN src p1\n ON (t1.key = p1.key)\n UNION ALL\n SELECT t2.key, p2.value\n FROM src1 t2\n LEFT OUTER JOIN src p2\n ON (t2.key = p2.key)) j","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(j.key)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"j.value","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(p1.key = t1.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(p2.key = t2.key)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src.key"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"60b589744e2527dd235a6c8168d6a653","queryText":"INSERT OVERWRITE TABLE dest_l1\nSELECT j.*\nFROM (SELECT t1.key, p1.value\n FROM src1 t1\n LEFT OUTER JOIN src p1\n ON (t1.key = p1.key)\n UNION ALL\n SELECT t2.key, p2.value\n FROM src1 t2\n LEFT OUTER JOIN src p2\n ON (t2.key = p2.key)) j","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(j.key)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"j.value","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(p1.key = t1.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(p2.key = t2.key)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0],"expression":"compute_stats(UDFToInteger(j.key), 16)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(j.value, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src.key"}]} PREHOOK: query: drop table if exists emp PREHOOK: type: DROPTABLE PREHOOK: query: drop table if exists dept @@ -593,7 +593,7 @@ PREHOOK: Input: default@dept PREHOOK: Input: default@emp PREHOOK: Input: default@project PREHOOK: Output: default@tgt -{"version":"1.0","engine":"tez","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id = m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = d.dept_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[11,9],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id = m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = d.dept_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[11,9],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"},{"sources":[6],"targets":[0],"expression":"compute_stats(default.dept.dept_name, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"expression":"compute_stats(default.emp.name, 16)","edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"expression":"compute_stats(default.emp.emp_id, 16)","edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"expression":"compute_stats(default.project.project_id, 16)","edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"expression":"compute_stats(default.project.project_name, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]} PREHOOK: query: drop table if exists dest_l2 PREHOOK: type: DROPTABLE PREHOOK: query: create table dest_l2 (id int, c1 tinyint, c2 int, c3 bigint) stored as textfile @@ -603,7 +603,7 @@ PREHOOK: Output: default@dest_l2 PREHOOK: query: insert into dest_l2 values(0, 1, 100, 10000) PREHOOK: type: QUERY PREHOOK: Output: default@dest_l2 -{"version":"1.0","engine":"tez","database":"default","hash":"e001334e3f8384806b0f25a7c303045f","queryText":"insert into dest_l2 values(0, 1, 100, 10000)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(values__tmp__table__1.tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"UDFToByte(values__tmp__table__1.tmp_values_col2)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"UDFToInteger(values__tmp__table__1.tmp_values_col3)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToLong(values__tmp__table__1.tmp_values_col4)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"e001334e3f8384806b0f25a7c303045f","queryText":"insert into dest_l2 values(0, 1, 100, 10000)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(values__tmp__table__1.tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"UDFToByte(values__tmp__table__1.tmp_values_col2)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"UDFToInteger(values__tmp__table__1.tmp_values_col3)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToLong(values__tmp__table__1.tmp_values_col4)","edgeType":"PROJECTION"},{"sources":[],"targets":[0],"expression":"compute_stats(UDFToInteger(values__tmp__table__1.tmp_values_col1), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"compute_stats(UDFToByte(values__tmp__table__1.tmp_values_col2), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"compute_stats(UDFToInteger(values__tmp__table__1.tmp_values_col3), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"compute_stats(UDFToLong(values__tmp__table__1.tmp_values_col4), 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"}]} PREHOOK: query: select * from ( select c1 + c2 x from dest_l2 union all @@ -623,7 +623,7 @@ PREHOOK: Output: default@dest_l3 PREHOOK: query: insert into dest_l3 values(0, "s1", "s2", 15) PREHOOK: type: QUERY PREHOOK: Output: default@dest_l3 -{"version":"1.0","engine":"tez","database":"default","hash":"09df51ba6ba2d07f2304523ee505f094","queryText":"insert into dest_l3 values(0, \"s1\", \"s2\", 15)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(values__tmp__table__2.tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1,2],"edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToInteger(values__tmp__table__2.tmp_values_col4)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"09df51ba6ba2d07f2304523ee505f094","queryText":"insert into dest_l3 values(0, \"s1\", \"s2\", 15)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(values__tmp__table__2.tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1,2],"edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToInteger(values__tmp__table__2.tmp_values_col4)","edgeType":"PROJECTION"},{"sources":[],"targets":[0],"expression":"compute_stats(UDFToInteger(values__tmp__table__2.tmp_values_col1), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"compute_stats(default.values__tmp__table__2.tmp_values_col2, 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"compute_stats(default.values__tmp__table__2.tmp_values_col3, 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"compute_stats(UDFToInteger(values__tmp__table__2.tmp_values_col4), 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} PREHOOK: query: select sum(a.c1) over (partition by a.c1 order by a.id) from dest_l2 a where a.c2 != 10 @@ -702,4 +702,4 @@ from relations lateral view explode(ep1_ids) rel1 as ep1_id PREHOOK: type: QUERY PREHOOK: Input: default@relations PREHOOK: Output: default@rels_exploded -{"version":"1.0","engine":"tez","database":"default","hash":"e76d2efade744d1d5cf74fda064ba6c6","queryText":"insert into rels_exploded select identity, type,\n ep1_src_type, ep1_type, ep2_src_type, ep2_type, ep1_id, ep2_id\nfrom relations lateral view explode(ep1_ids) rel1 as ep1_id\n lateral view explode (ep2_ids) rel2 as ep2_id","edges":[{"sources":[8],"targets":[0],"edgeType":"PROJECTION"},{"sources":[9],"targets":[1],"edgeType":"PROJECTION"},{"sources":[10],"targets":[2],"edgeType":"PROJECTION"},{"sources":[11],"targets":[3],"edgeType":"PROJECTION"},{"sources":[12],"targets":[4],"edgeType":"PROJECTION"},{"sources":[13],"targets":[5],"edgeType":"PROJECTION"},{"sources":[14],"targets":[6],"expression":"CAST( rel1._col11 AS CHAR(32)","edgeType":"PROJECTION"},{"sources":[15],"targets":[7],"expression":"CAST( rel2._col12 AS CHAR(32)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.rels_exploded.identity"},{"id":1,"vertexType":"COLUMN","vertexId":"default.rels_exploded.type"},{"id":2,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_src_type"},{"id":3,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_type"},{"id":4,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_src_type"},{"id":5,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_type"},{"id":6,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_id"},{"id":7,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_id"},{"id":8,"vertexType":"COLUMN","vertexId":"default.relations.identity"},{"id":9,"vertexType":"COLUMN","vertexId":"default.relations.type"},{"id":10,"vertexType":"COLUMN","vertexId":"default.relations.ep1_src_type"},{"id":11,"vertexType":"COLUMN","vertexId":"default.relations.ep1_type"},{"id":12,"vertexType":"COLUMN","vertexId":"default.relations.ep2_src_type"},{"id":13,"vertexType":"COLUMN","vertexId":"default.relations.ep2_type"},{"id":14,"vertexType":"COLUMN","vertexId":"default.relations.ep1_ids"},{"id":15,"vertexType":"COLUMN","vertexId":"default.relations.ep2_ids"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"e76d2efade744d1d5cf74fda064ba6c6","queryText":"insert into rels_exploded select identity, type,\n ep1_src_type, ep1_type, ep2_src_type, ep2_type, ep1_id, ep2_id\nfrom relations lateral view explode(ep1_ids) rel1 as ep1_id\n lateral view explode (ep2_ids) rel2 as ep2_id","edges":[{"sources":[8],"targets":[0],"edgeType":"PROJECTION"},{"sources":[9],"targets":[1],"edgeType":"PROJECTION"},{"sources":[10],"targets":[2],"edgeType":"PROJECTION"},{"sources":[11],"targets":[3],"edgeType":"PROJECTION"},{"sources":[12],"targets":[4],"edgeType":"PROJECTION"},{"sources":[13],"targets":[5],"edgeType":"PROJECTION"},{"sources":[14],"targets":[6],"expression":"CAST( rel1._col11 AS CHAR(32)","edgeType":"PROJECTION"},{"sources":[15],"targets":[7],"expression":"CAST( rel2._col12 AS CHAR(32)","edgeType":"PROJECTION"},{"sources":[8],"targets":[0],"expression":"compute_stats(default.relations.identity, 16)","edgeType":"PROJECTION"},{"sources":[9],"targets":[1],"expression":"compute_stats(default.relations.type, 16)","edgeType":"PROJECTION"},{"sources":[10],"targets":[2],"expression":"compute_stats(default.relations.ep1_src_type, 16)","edgeType":"PROJECTION"},{"sources":[11],"targets":[3],"expression":"compute_stats(default.relations.ep1_type, 16)","edgeType":"PROJECTION"},{"sources":[12],"targets":[4],"expression":"compute_stats(default.relations.ep2_src_type, 16)","edgeType":"PROJECTION"},{"sources":[13],"targets":[5],"expression":"compute_stats(default.relations.ep2_type, 16)","edgeType":"PROJECTION"},{"sources":[14],"targets":[6],"expression":"compute_stats(CAST( rel1._col11 AS CHAR(32), 16)","edgeType":"PROJECTION"},{"sources":[15],"targets":[7],"expression":"compute_stats(CAST( rel2._col12 AS CHAR(32), 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.rels_exploded.identity"},{"id":1,"vertexType":"COLUMN","vertexId":"default.rels_exploded.type"},{"id":2,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_src_type"},{"id":3,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_type"},{"id":4,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_src_type"},{"id":5,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_type"},{"id":6,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_id"},{"id":7,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_id"},{"id":8,"vertexType":"COLUMN","vertexId":"default.relations.identity"},{"id":9,"vertexType":"COLUMN","vertexId":"default.relations.type"},{"id":10,"vertexType":"COLUMN","vertexId":"default.relations.ep1_src_type"},{"id":11,"vertexType":"COLUMN","vertexId":"default.relations.ep1_type"},{"id":12,"vertexType":"COLUMN","vertexId":"default.relations.ep2_src_type"},{"id":13,"vertexType":"COLUMN","vertexId":"default.relations.ep2_type"},{"id":14,"vertexType":"COLUMN","vertexId":"default.relations.ep1_ids"},{"id":15,"vertexType":"COLUMN","vertexId":"default.relations.ep2_ids"}]} diff --git a/ql/src/test/results/clientpositive/llap/lineage3.q.out b/ql/src/test/results/clientpositive/llap/lineage3.q.out index 25f0439477..f9d1bb1eff 100644 --- a/ql/src/test/results/clientpositive/llap/lineage3.q.out +++ b/ql/src/test/results/clientpositive/llap/lineage3.q.out @@ -10,7 +10,7 @@ insert into table d1 select x + length(y) PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@d1 -{"version":"1.0","engine":"tez","database":"default","hash":"4c9b7b8d89403cef78668f15d393e542","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x + length(y)","edges":[{"sources":[1,2],"targets":[0],"expression":"(UDFToInteger(a.ctinyint) + length(b.cstring1))","edgeType":"PROJECTION"},{"sources":[3],"targets":[0],"expression":"a.cint is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"b.cbigint is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"4c9b7b8d89403cef78668f15d393e542","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x + length(y)","edges":[{"sources":[1,2],"targets":[0],"expression":"(UDFToInteger(a.ctinyint) + length(b.cstring1))","edgeType":"PROJECTION"},{"sources":[3],"targets":[0],"expression":"a.cint is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"b.cbigint is not null","edgeType":"PREDICATE"},{"sources":[1,2],"targets":[0],"expression":"compute_stats((UDFToInteger(a.ctinyint) + length(b.cstring1)), 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} PREHOOK: query: drop table if exists d2 PREHOOK: type: DROPTABLE PREHOOK: query: create table d2(b varchar(128)) @@ -25,7 +25,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@d1 PREHOOK: Output: default@d2 -{"version":"1.0","engine":"tez","database":"default","hash":"8703e4091ebd4c96afd3cac83e3a2957","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x where y is null\ninsert into table d2 select y where x > 0","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(x)","edgeType":"PROJECTION"},{"sources":[3],"targets":[0,1],"expression":"a.cint is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0,1],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"b.cbigint is not null","edgeType":"PREDICATE"},{"sources":[5],"targets":[0],"expression":"t.y is null","edgeType":"PREDICATE"},{"sources":[5],"targets":[1],"expression":"CAST( y AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[1],"expression":"(t.x > 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.d2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"8703e4091ebd4c96afd3cac83e3a2957","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x where y is null\ninsert into table d2 select y where x > 0","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(x)","edgeType":"PROJECTION"},{"sources":[3],"targets":[0,1],"expression":"a.cint is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0,1],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"b.cbigint is not null","edgeType":"PREDICATE"},{"sources":[5],"targets":[0],"expression":"t.y is null","edgeType":"PREDICATE"},{"sources":[5],"targets":[1],"expression":"CAST( y AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[1],"expression":"(t.x > 0)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0],"expression":"compute_stats(UDFToInteger(x), 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.d2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} PREHOOK: query: drop table if exists t PREHOOK: type: DROPTABLE PREHOOK: query: create table t as @@ -51,7 +51,7 @@ where cint is not null and cint < 0 order by cint, cs limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@dest_l1@ds=today -{"version":"1.0","engine":"tez","database":"default","hash":"2b5891d094ff74e23ec6acf5b4990f45","queryText":"insert into table dest_l1 partition (ds='today')\nselect cint, cast(cstring1 as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cint < 0 order by cint, cs limit 5","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"CAST( alltypesorc.cstring1 AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(alltypesorc.cint < 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"2b5891d094ff74e23ec6acf5b4990f45","queryText":"insert into table dest_l1 partition (ds='today')\nselect cint, cast(cstring1 as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cint < 0 order by cint, cs limit 5","edges":[{"sources":[3],"targets":[0],"edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"CAST( alltypesorc.cstring1 AS varchar(128))","edgeType":"PROJECTION"},{"sources":[3],"targets":[0,1,2],"expression":"(alltypesorc.cint < 0)","edgeType":"PREDICATE"},{"sources":[3],"targets":[0],"expression":"compute_stats(default.alltypesorc.cint, 16)","edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"compute_stats(CAST( alltypesorc.cstring1 AS varchar(128)), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"'today'","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l1.ds"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} PREHOOK: query: insert into table dest_l1 partition (ds='tomorrow') select min(cint), cast(min(cstring1) as varchar(128)) as cs from alltypesorc @@ -61,7 +61,7 @@ having min(cbigint) > 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@dest_l1@ds=tomorrow -{"version":"1.0","engine":"tez","database":"default","hash":"4ad6338a8abfe3fe0342198fcbd1f11d","queryText":"insert into table dest_l1 partition (ds='tomorrow')\nselect min(cint), cast(min(cstring1) as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cboolean1 = true\ngroup by csmallint\nhaving min(cbigint) > 10","edges":[{"sources":[2],"targets":[0],"expression":"min(default.alltypesorc.cint)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"CAST( min(default.alltypesorc.cstring1) AS varchar(128))","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(alltypesorc.cboolean1 and alltypesorc.cint is not null)","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1],"expression":"(min(default.alltypesorc.cbigint) > 10)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"4ad6338a8abfe3fe0342198fcbd1f11d","queryText":"insert into table dest_l1 partition (ds='tomorrow')\nselect min(cint), cast(min(cstring1) as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cboolean1 = true\ngroup by csmallint\nhaving min(cbigint) > 10","edges":[{"sources":[3],"targets":[0],"expression":"min(default.alltypesorc.cint)","edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"CAST( min(default.alltypesorc.cstring1) AS varchar(128))","edgeType":"PROJECTION"},{"sources":[5,3],"targets":[0,1,2],"expression":"(alltypesorc.cboolean1 and alltypesorc.cint is not null)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2],"expression":"(min(default.alltypesorc.cbigint) > 10)","edgeType":"PREDICATE"},{"sources":[3],"targets":[0],"expression":"compute_stats(min(default.alltypesorc.cint), 16)","edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"compute_stats(CAST( min(default.alltypesorc.cstring1) AS varchar(128)), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"'tomorrow'","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l1.ds"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} PREHOOK: query: select cint, rank() over(order by cint) from alltypesorc where cint > 10 and cint < 10000 limit 10 PREHOOK: type: QUERY @@ -348,21 +348,23 @@ PREHOOK: query: insert into dest_dp1 partition (year) select first, word, year f PREHOOK: type: QUERY PREHOOK: Input: default@src_dp PREHOOK: Output: default@dest_dp1 -{"version":"1.0","engine":"tez","database":"default","hash":"b2d38401a3281e74a003d9650df97060","queryText":"insert into dest_dp1 partition (year) select first, word, year from src_dp","edges":[{"sources":[3],"targets":[0],"edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"b2d38401a3281e74a003d9650df97060","queryText":"insert into dest_dp1 partition (year) select first, word, year from src_dp","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[6],"targets":[4],"expression":"compute_stats(default.src_dp.first, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[5],"expression":"compute_stats(default.src_dp.word, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":3,"vertexType":"COLUMN","vertexId":"year"},{"id":4,"vertexType":"COLUMN","vertexId":"first"},{"id":5,"vertexType":"COLUMN","vertexId":"word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":8,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]} PREHOOK: query: insert into dest_dp2 partition (y, m) select first, word, year, month from src_dp PREHOOK: type: QUERY PREHOOK: Input: default@src_dp PREHOOK: Output: default@dest_dp2 -{"version":"1.0","engine":"tez","database":"default","hash":"237302d8ffd62b5b71d9544b22de7770","queryText":"insert into dest_dp2 partition (y, m) select first, word, year, month from src_dp","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src_dp.month"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"237302d8ffd62b5b71d9544b22de7770","queryText":"insert into dest_dp2 partition (y, m) select first, word, year, month from src_dp","edges":[{"sources":[8],"targets":[0],"edgeType":"PROJECTION"},{"sources":[9],"targets":[1],"edgeType":"PROJECTION"},{"sources":[10],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[11],"targets":[4,5],"edgeType":"PROJECTION"},{"sources":[8],"targets":[6],"expression":"compute_stats(default.src_dp.first, 16)","edgeType":"PROJECTION"},{"sources":[9],"targets":[7],"expression":"compute_stats(default.src_dp.word, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":3,"vertexType":"COLUMN","vertexId":"year"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":5,"vertexType":"COLUMN","vertexId":"month"},{"id":6,"vertexType":"COLUMN","vertexId":"first"},{"id":7,"vertexType":"COLUMN","vertexId":"word"},{"id":8,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":9,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":10,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":11,"vertexType":"COLUMN","vertexId":"default.src_dp.month"}]} PREHOOK: query: insert into dest_dp2 partition (y=0, m) select first, word, month from src_dp where year=0 PREHOOK: type: QUERY PREHOOK: Input: default@src_dp PREHOOK: Output: default@dest_dp2@y=0 +Result schema has 3 fields, but we don't get as many dependencies {"version":"1.0","engine":"tez","database":"default","hash":"63e990b47e7ab4eb6f2ea09dfb7453ff","queryText":"insert into dest_dp2 partition (y=0, m) select first, word, month from src_dp where year=0","edges":[{"sources":[3],"targets":[0],"edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"},{"sources":[6],"targets":[0,1,2],"expression":"(src_dp.year = 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]} PREHOOK: query: insert into dest_dp3 partition (y=0, m, d) select first, word, month m, day d from src_dp where year=0 PREHOOK: type: QUERY PREHOOK: Input: default@src_dp PREHOOK: Output: default@dest_dp3@y=0 +Result schema has 4 fields, but we don't get as many dependencies {"version":"1.0","engine":"tez","database":"default","hash":"6bf71a9d02c0612c63b6f40b15c1e8b3","queryText":"insert into dest_dp3 partition (y=0, m, d) select first, word, month m, day d from src_dp where year=0","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[8],"targets":[0,1,2,3],"expression":"(src_dp.year = 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp3.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp3.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp3.m"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp3.d"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src_dp.day"},{"id":8,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]} PREHOOK: query: drop table if exists src_dp1 PREHOOK: type: DROPTABLE @@ -385,4 +387,4 @@ PREHOOK: Output: default@dest_dp1@year=0 PREHOOK: Output: default@dest_dp2 PREHOOK: Output: default@dest_dp2@y=1 PREHOOK: Output: default@dest_dp3@y=2 -{"version":"1.0","engine":"tez","database":"default","hash":"44f16edbf35cfeaf3d4f7b0113a69b74","queryText":"from src_dp, src_dp1\ninsert into dest_dp1 partition (year) select first, word, year\ninsert into dest_dp2 partition (y, m) select first, word, year, month\ninsert into dest_dp3 partition (y=2, m, d) select first, word, month m, day d where year=2\ninsert into dest_dp2 partition (y=1, m) select f, w, m\ninsert into dest_dp1 partition (year=0) select f, w","edges":[{"sources":[11],"targets":[0,1,2],"edgeType":"PROJECTION"},{"sources":[12],"targets":[3,4,5],"edgeType":"PROJECTION"},{"sources":[13],"targets":[6,7],"edgeType":"PROJECTION"},{"sources":[14],"targets":[8,9],"edgeType":"PROJECTION"},{"sources":[15],"targets":[1,0],"edgeType":"PROJECTION"},{"sources":[16],"targets":[4,3],"edgeType":"PROJECTION"},{"sources":[17],"targets":[8],"edgeType":"PROJECTION"},{"sources":[18],"targets":[10],"edgeType":"PROJECTION"},{"sources":[13],"targets":[2,5,9,10],"expression":"(subq.col7 = 2)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp3.first"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_dp3.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":7,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":8,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":9,"vertexType":"COLUMN","vertexId":"default.dest_dp3.m"},{"id":10,"vertexType":"COLUMN","vertexId":"default.dest_dp3.d"},{"id":11,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":12,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":13,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":14,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":15,"vertexType":"COLUMN","vertexId":"default.src_dp1.f"},{"id":16,"vertexType":"COLUMN","vertexId":"default.src_dp1.w"},{"id":17,"vertexType":"COLUMN","vertexId":"default.src_dp1.m"},{"id":18,"vertexType":"COLUMN","vertexId":"default.src_dp.day"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"44f16edbf35cfeaf3d4f7b0113a69b74","queryText":"from src_dp, src_dp1\ninsert into dest_dp1 partition (year) select first, word, year\ninsert into dest_dp2 partition (y, m) select first, word, year, month\ninsert into dest_dp3 partition (y=2, m, d) select first, word, month m, day d where year=2\ninsert into dest_dp2 partition (y=1, m) select f, w, m\ninsert into dest_dp1 partition (year=0) select f, w","edges":[{"sources":[11],"targets":[0,1,2],"edgeType":"PROJECTION"},{"sources":[12],"targets":[3,4,5],"edgeType":"PROJECTION"},{"sources":[13],"targets":[6,7],"edgeType":"PROJECTION"},{"sources":[14],"targets":[8,9],"edgeType":"PROJECTION"},{"sources":[15],"targets":[1,0],"edgeType":"PROJECTION"},{"sources":[16],"targets":[4,3],"edgeType":"PROJECTION"},{"sources":[17],"targets":[8],"edgeType":"PROJECTION"},{"sources":[18],"targets":[10],"edgeType":"PROJECTION"},{"sources":[13],"targets":[2,5,9,10],"expression":"(subq.col7 = 2)","edgeType":"PREDICATE"},{"sources":[11],"targets":[0],"expression":"compute_stats(default.src_dp.first, 16)","edgeType":"PROJECTION"},{"sources":[12],"targets":[3],"expression":"compute_stats(default.src_dp.word, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp3.first"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_dp3.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":7,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":8,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":9,"vertexType":"COLUMN","vertexId":"default.dest_dp3.m"},{"id":10,"vertexType":"COLUMN","vertexId":"default.dest_dp3.d"},{"id":11,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":12,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":13,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":14,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":15,"vertexType":"COLUMN","vertexId":"default.src_dp1.f"},{"id":16,"vertexType":"COLUMN","vertexId":"default.src_dp1.w"},{"id":17,"vertexType":"COLUMN","vertexId":"default.src_dp1.m"},{"id":18,"vertexType":"COLUMN","vertexId":"default.src_dp.day"}]} diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out index cdb688b139..ffdcaa4081 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out @@ -27,11 +27,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -73,6 +77,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 500 Data size: 179000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -126,6 +149,42 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -162,6 +221,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from src PREHOOK: type: QUERY @@ -204,7 +271,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 diff --git a/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out b/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out index d35501e82b..055f6f1d3b 100644 --- a/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out @@ -1630,7 +1630,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: oft - Statistics: Num rows: 12288 Data size: 5280746 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 12288 Data size: 1884148 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1640,14 +1640,14 @@ STAGE PLANS: outputColumnNames: _col1, _col6, _col7, _col10 input vertices: 1 Map 2 - Statistics: Num rows: 960 Data size: 497280 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 960 Data size: 195840 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col10 (type: tinyint), _col1 (type: int), _col6 (type: char(255)), _col7 (type: varchar(255)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 960 Data size: 3840 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 960 Data size: 195840 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 960 Data size: 3840 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 960 Data size: 195840 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/llap_stats.q.out b/ql/src/test/results/clientpositive/llap/llap_stats.q.out index f81ad50679..4404ad060e 100644 --- a/ql/src/test/results/clientpositive/llap/llap_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_stats.q.out @@ -108,11 +108,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: llap_stats - Statistics: Num rows: 10 Data size: 116 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int) outputColumnNames: ctinyint, csmallint, cint - Statistics: Num rows: 10 Data size: 116 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: compute_stats(ctinyint, 16), compute_stats(csmallint, 16) keys: cint (type: int) diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out index 6ad9af8086..91eb14dfbb 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out @@ -57,6 +57,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -64,6 +66,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -85,6 +91,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE @@ -100,8 +122,64 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1262 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1262 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1238 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1238 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1238 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -122,6 +200,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out index ce5517a54d..5eadad5c02 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out @@ -39,6 +39,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -46,6 +47,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -79,6 +81,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part_bucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-23' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 912000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2524 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 2524 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -99,6 +137,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part_bucket + PREHOOK: query: insert overwrite table nzhang_part_bucket partition (ds='2010-03-23', hr) select key, value, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part3.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part3.q.out index b34975fedc..52b4c87d42 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part3.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part3.q.out @@ -45,11 +45,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -68,8 +72,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 5408 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 5408 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -90,6 +130,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part3 + PREHOOK: query: insert overwrite table nzhang_part3 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out index 9c4e8a891b..6370c63659 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out @@ -32,11 +32,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -55,8 +59,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 214 Data size: 124762 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 124762 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 214 Data size: 122194 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 214 Data size: 122194 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 214 Data size: 122194 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -76,6 +116,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.nzhang_part5 + PREHOOK: query: insert overwrite table nzhang_part5 partition (value) select key, value from src PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/llap/mapjoin3.q.out b/ql/src/test/results/clientpositive/llap/mapjoin3.q.out index 45992144d1..b8e9ec1c06 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin3.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin3.q.out @@ -105,11 +105,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: member (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -119,14 +119,14 @@ STAGE PLANS: outputColumnNames: _col0, _col2, _col3 input vertices: 1 Map 2 - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col2 (type: varchar(100)), _col3 (type: varchar(100)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -137,16 +137,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: member (type: bigint), age (type: varchar(100)), age (type: varchar(100)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: varchar(100)), _col2 (type: varchar(100)) Execution mode: llap LLAP IO: no inputs diff --git a/ql/src/test/results/clientpositive/llap/mapjoin46.q.out b/ql/src/test/results/clientpositive/llap/mapjoin46.q.out index efada10a2b..d4729067b7 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin46.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin46.q.out @@ -62,11 +62,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -76,10 +76,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -90,16 +90,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -162,11 +162,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -179,10 +179,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -193,19 +193,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -269,11 +269,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -286,10 +286,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -300,17 +300,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -370,16 +370,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -387,11 +387,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -401,10 +401,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 0 Map 1 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -465,11 +465,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -482,10 +482,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -496,14 +496,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -572,11 +572,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -587,10 +587,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -601,14 +601,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -680,11 +680,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -695,10 +695,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -709,14 +709,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -786,11 +786,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -801,10 +801,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -815,14 +815,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -888,11 +888,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -903,10 +903,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -917,16 +917,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -993,14 +993,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1008,11 +1008,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -1023,10 +1023,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1101,14 +1101,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1116,11 +1116,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -1131,10 +1131,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1204,14 +1204,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1219,11 +1219,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -1234,10 +1234,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1308,16 +1308,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1325,11 +1325,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -1340,10 +1340,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1411,14 +1411,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1426,14 +1426,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1448,10 +1448,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1524,14 +1524,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1539,14 +1539,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1561,10 +1561,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1635,14 +1635,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1650,14 +1650,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1672,10 +1672,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1744,16 +1744,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1761,16 +1761,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1785,10 +1785,10 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1910 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1910 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1886,16 +1886,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) @@ -1922,11 +1922,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -1937,10 +1937,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1948,16 +1948,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1972,10 +1972,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 residual filter predicates: {(_col1 is null or (_col10 is null and (_col7 <> _col4)))} - Statistics: Num rows: 36 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 64 Data size: 24448 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 64 Data size: 24448 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_decimal.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_decimal.q.out index 38b8f92701..eb638e9907 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin_decimal.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin_decimal.q.out @@ -95,14 +95,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: dec is not null (type: boolean) - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: dec (type: decimal(4,2)) outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -112,11 +112,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1126 Data size: 252224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(4,2)) sort order: + - Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1126 Data size: 252224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(4,0)) Execution mode: llap LLAP IO: all inputs @@ -124,19 +124,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: dec is not null (type: boolean) - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: dec (type: decimal(4,0)) outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(6,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(6,2)) - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -145,10 +145,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: decimal(4,0)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1126 Data size: 252224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1126 Data size: 252224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_emit_interval.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_emit_interval.q.out index fdbca8af8d..294cea66a8 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin_emit_interval.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin_emit_interval.q.out @@ -62,11 +62,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -79,10 +79,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -93,16 +93,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -161,11 +161,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -178,10 +178,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -192,14 +192,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs diff --git a/ql/src/test/results/clientpositive/llap/mapreduce1.q.out b/ql/src/test/results/clientpositive/llap/mapreduce1.q.out index 37f92d9f20..881bb79602 100644 --- a/ql/src/test/results/clientpositive/llap/mapreduce1.q.out +++ b/ql/src/test/results/clientpositive/llap/mapreduce1.q.out @@ -27,6 +27,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -34,6 +35,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -73,6 +75,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -90,6 +120,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/llap/mapreduce2.q.out b/ql/src/test/results/clientpositive/llap/mapreduce2.q.out index 71bbb7e612..53fa2419be 100644 --- a/ql/src/test/results/clientpositive/llap/mapreduce2.q.out +++ b/ql/src/test/results/clientpositive/llap/mapreduce2.q.out @@ -25,6 +25,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -32,6 +33,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -70,6 +72,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -87,6 +117,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/llap/merge1.q.out b/ql/src/test/results/clientpositive/llap/merge1.q.out index 4bcb72877e..76e31dd739 100644 --- a/ql/src/test/results/clientpositive/llap/merge1.q.out +++ b/ql/src/test/results/clientpositive/llap/merge1.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -26,6 +27,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +74,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 205 Data size: 1640 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -89,6 +119,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.dest1 + PREHOOK: query: insert overwrite table dest1 select key, count(1) from src group by key PREHOOK: type: QUERY @@ -473,31 +510,63 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_src - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -515,6 +584,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 + PREHOOK: query: insert overwrite table dest1 select key from test_src PREHOOK: type: QUERY PREHOOK: Input: default@test_src @@ -539,31 +615,63 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_src - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -581,6 +689,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 + PREHOOK: query: insert overwrite table dest1 select key from test_src PREHOOK: type: QUERY PREHOOK: Input: default@test_src diff --git a/ql/src/test/results/clientpositive/llap/merge2.q.out b/ql/src/test/results/clientpositive/llap/merge2.q.out index aa0567b6f8..144acc2fe0 100644 --- a/ql/src/test/results/clientpositive/llap/merge2.q.out +++ b/ql/src/test/results/clientpositive/llap/merge2.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -26,6 +27,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +74,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 205 Data size: 1640 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -89,6 +119,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.test1 + PREHOOK: query: insert overwrite table test1 select key, count(1) from src group by key PREHOOK: type: QUERY @@ -473,31 +510,63 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_src - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -515,6 +584,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 + PREHOOK: query: insert overwrite table test1 select key from test_src PREHOOK: type: QUERY PREHOOK: Input: default@test_src @@ -539,31 +615,63 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_src - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -581,6 +689,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 + PREHOOK: query: insert overwrite table test1 select key from test_src PREHOOK: type: QUERY PREHOOK: Input: default@test_src diff --git a/ql/src/test/results/clientpositive/llap/mergejoin.q.out b/ql/src/test/results/clientpositive/llap/mergejoin.q.out index 10fb45d284..c9db7aae09 100644 --- a/ql/src/test/results/clientpositive/llap/mergejoin.q.out +++ b/ql/src/test/results/clientpositive/llap/mergejoin.q.out @@ -293,19 +293,19 @@ STAGE PLANS: TableScan alias: a filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 4 @@ -313,19 +313,19 @@ STAGE PLANS: TableScan alias: b filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -337,15 +337,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -354,10 +354,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1382,32 +1382,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1419,15 +1419,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1436,10 +1436,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1493,32 +1493,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1530,15 +1530,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1547,10 +1547,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1604,32 +1604,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1641,15 +1641,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 742 Data size: 5936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1658,10 +1658,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1704,9 +1704,11 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Reducer 7 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Map 8 <- Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -1714,41 +1716,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - filterExpr: (key is not null and value is not null and (value BETWEEN DynamicValue(RS_13_c_value_min) AND DynamicValue(RS_13_c_value_max) and in_bloom_filter(value, DynamicValue(RS_13_c_value_bloom_filter)))) (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + filterExpr: (key is not null and value is not null and (value BETWEEN DynamicValue(RS_10_c_value_min) AND DynamicValue(RS_10_c_value_max) and in_bloom_filter(value, DynamicValue(RS_10_c_value_bloom_filter)))) (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key is not null and value is not null and (value BETWEEN DynamicValue(RS_13_c_value_min) AND DynamicValue(RS_13_c_value_max) and in_bloom_filter(value, DynamicValue(RS_13_c_value_bloom_filter)))) (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null and (value BETWEEN DynamicValue(RS_10_c_value_min) AND DynamicValue(RS_10_c_value_max) and in_bloom_filter(value, DynamicValue(RS_10_c_value_bloom_filter)))) (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs Map 6 @@ -1784,6 +1766,26 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: no inputs + Map 8 + Map Operator Tree: + TableScan + alias: b + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_12_a_key_min) AND DynamicValue(RS_12_a_key_max) and in_bloom_filter(key, DynamicValue(RS_12_a_key_bloom_filter)))) (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and (key BETWEEN DynamicValue(RS_12_a_key_min) AND DynamicValue(RS_12_a_key_max) and in_bloom_filter(key, DynamicValue(RS_12_a_key_bloom_filter)))) (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1791,15 +1793,28 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=54) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1807,17 +1822,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 61710 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 137 Data size: 1096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap @@ -1826,14 +1841,26 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=54) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: @@ -1892,19 +1919,19 @@ STAGE PLANS: TableScan alias: a filterExpr: value is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col1 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 4 @@ -1912,19 +1939,19 @@ STAGE PLANS: TableScan alias: b filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col1 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1936,15 +1963,15 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col1 (type: string) - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 565 Data size: 4520 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1953,10 +1980,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2015,19 +2042,19 @@ STAGE PLANS: TableScan alias: s1 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 6 @@ -2035,19 +2062,19 @@ STAGE PLANS: TableScan alias: s3 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 7 @@ -2055,19 +2082,19 @@ STAGE PLANS: TableScan alias: s2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 742 Data size: 2968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 8 @@ -2075,19 +2102,19 @@ STAGE PLANS: TableScan alias: b filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -2100,12 +2127,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 266 Data size: 27152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 742 Data size: 2968 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -2115,15 +2142,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 558 Data size: 57019 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1892 Data size: 15136 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 5 Execution mode: vectorized, llap @@ -2132,10 +2159,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2171,19 +2198,19 @@ STAGE PLANS: TableScan alias: a filterExpr: value is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col1 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 4 @@ -2191,19 +2218,19 @@ STAGE PLANS: TableScan alias: b filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col1 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -2215,15 +2242,15 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col1 (type: string) - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 565 Data size: 4520 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -2232,10 +2259,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2276,9 +2303,11 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Reducer 7 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Map 8 <- Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2286,41 +2315,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - filterExpr: (key is not null and value is not null and (value BETWEEN DynamicValue(RS_13_c_value_min) AND DynamicValue(RS_13_c_value_max) and in_bloom_filter(value, DynamicValue(RS_13_c_value_bloom_filter)))) (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + filterExpr: (key is not null and value is not null and (value BETWEEN DynamicValue(RS_10_c_value_min) AND DynamicValue(RS_10_c_value_max) and in_bloom_filter(value, DynamicValue(RS_10_c_value_bloom_filter)))) (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key is not null and value is not null and (value BETWEEN DynamicValue(RS_13_c_value_min) AND DynamicValue(RS_13_c_value_max) and in_bloom_filter(value, DynamicValue(RS_13_c_value_bloom_filter)))) (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null and (value BETWEEN DynamicValue(RS_10_c_value_min) AND DynamicValue(RS_10_c_value_max) and in_bloom_filter(value, DynamicValue(RS_10_c_value_bloom_filter)))) (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs Map 6 @@ -2356,6 +2365,26 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: no inputs + Map 8 + Map Operator Tree: + TableScan + alias: b + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_12_a_key_min) AND DynamicValue(RS_12_a_key_max) and in_bloom_filter(key, DynamicValue(RS_12_a_key_bloom_filter)))) (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and (key BETWEEN DynamicValue(RS_12_a_key_min) AND DynamicValue(RS_12_a_key_max) and in_bloom_filter(key, DynamicValue(RS_12_a_key_bloom_filter)))) (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2363,15 +2392,28 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=54) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -2379,17 +2421,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 61710 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 137 Data size: 1096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap @@ -2398,14 +2440,26 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=54) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: @@ -2474,19 +2528,19 @@ STAGE PLANS: TableScan alias: s1 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 6 @@ -2494,19 +2548,19 @@ STAGE PLANS: TableScan alias: s3 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 7 @@ -2514,19 +2568,19 @@ STAGE PLANS: TableScan alias: s2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 742 Data size: 2968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 8 @@ -2534,19 +2588,19 @@ STAGE PLANS: TableScan alias: b filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -2559,12 +2613,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 266 Data size: 27152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 742 Data size: 2968 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -2574,15 +2628,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 558 Data size: 57019 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1892 Data size: 15136 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 5 Execution mode: vectorized, llap @@ -2591,10 +2645,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2646,18 +2700,18 @@ STAGE PLANS: TableScan alias: t1 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 5 @@ -2665,18 +2719,18 @@ STAGE PLANS: TableScan alias: t2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -2685,12 +2739,12 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -2700,15 +2754,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap @@ -2717,10 +2771,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2731,12 +2785,12 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out index 81483f1afb..f91f5718f9 100644 --- a/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out @@ -183,56 +183,12 @@ POSTHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -243,56 +199,12 @@ POSTHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl_part - Statistics: Num rows: 9489 Data size: 1054697 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9489 Data size: 1054697 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -303,60 +215,12 @@ POSTHOOK: query: explain select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -367,60 +231,12 @@ POSTHOOK: query: explain select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl_part - Statistics: Num rows: 9489 Data size: 1054697 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9489 Data size: 1054697 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -646,19 +462,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: stats_tbl_part - Statistics: Num rows: 9489 Data size: 1054697 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9489 Data size: 379560 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ts (type: timestamp) outputColumnNames: ts - Statistics: Num rows: 9489 Data size: 1054697 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9489 Data size: 379560 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(ts) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -669,10 +485,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out b/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out index 6d756a822e..d53af139b4 100644 --- a/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out +++ b/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out @@ -21,6 +21,7 @@ RUN: Stage-1:MAPRED RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE RUN: Stage-3:STATS +RUN: Stage-4:COLUMNSTATS PREHOOK: query: create table smallTbl2(key string, value string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -44,6 +45,7 @@ RUN: Stage-1:MAPRED RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE RUN: Stage-3:STATS +RUN: Stage-4:COLUMNSTATS PREHOOK: query: create table smallTbl3(key string, value string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -67,6 +69,7 @@ RUN: Stage-1:MAPRED RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE RUN: Stage-3:STATS +RUN: Stage-4:COLUMNSTATS PREHOOK: query: create table smallTbl4(key string, value string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -90,6 +93,7 @@ RUN: Stage-1:MAPRED RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE RUN: Stage-3:STATS +RUN: Stage-4:COLUMNSTATS PREHOOK: query: create table bigTbl(key string, value string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -157,6 +161,7 @@ RUN: Stage-1:MAPRED RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE RUN: Stage-3:STATS +RUN: Stage-4:COLUMNSTATS PREHOOK: query: explain select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, @@ -192,14 +197,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: bigtbl - Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -209,11 +214,11 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 3 - Statistics: Num rows: 5500 Data size: 58432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 22113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5500 Data size: 58432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 22113 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -222,15 +227,15 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 4 - Statistics: Num rows: 6050 Data size: 64275 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -238,38 +243,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: smalltbl1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: smalltbl2 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -279,10 +284,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -355,14 +360,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: bigtbl - Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -372,11 +377,11 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 3 - Statistics: Num rows: 5500 Data size: 58432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 22113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5500 Data size: 58432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 22113 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -385,15 +390,15 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 4 - Statistics: Num rows: 6050 Data size: 64275 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -401,38 +406,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: smalltbl1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: smalltbl2 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -442,10 +447,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -520,14 +525,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: bigtbl - Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -537,11 +542,11 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 3 - Statistics: Num rows: 5500 Data size: 58432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 22113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5500 Data size: 58432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 22113 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -551,18 +556,18 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 4 - Statistics: Num rows: 6050 Data size: 64275 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 18700 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6050 Data size: 64275 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6050 Data size: 64275 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -570,38 +575,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: smalltbl1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: smalltbl2 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -613,14 +618,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3025 Data size: 32137 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 3025 Data size: 32137 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3025 Data size: 32137 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -742,6 +747,7 @@ RUN: Stage-1:MAPRED RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE RUN: Stage-3:STATS +RUN: Stage-4:COLUMNSTATS PREHOOK: query: EXPLAIN SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), @@ -825,14 +831,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: bigtbl - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key1 is not null and value is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key1 (type: string), key2 (type: string), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -842,11 +848,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 3 - Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 85050 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 107163 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -856,11 +862,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 4 - Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 117700 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 95700 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -870,11 +876,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 input vertices: 1 Map 5 - Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 136400 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 114400 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -884,19 +890,19 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 input vertices: 1 Map 6 - Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 314 Data size: 221370 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 314 Data size: 221370 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -904,38 +910,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: smalltbl1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: smalltbl2 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -943,38 +949,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: smalltbl3 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 Map Operator Tree: TableScan alias: smalltbl4 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -984,10 +990,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1160,14 +1166,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: bigtbl - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key1 is not null and value is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key1 (type: string), key2 (type: string), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1177,11 +1183,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 3 - Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 85050 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 107163 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1191,11 +1197,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 4 - Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 117700 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 95700 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1205,11 +1211,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 input vertices: 1 Map 5 - Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 136400 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 114400 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1219,19 +1225,19 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 input vertices: 1 Map 6 - Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 314 Data size: 221370 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 314 Data size: 221370 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1239,38 +1245,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: smalltbl1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: smalltbl2 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1278,38 +1284,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: smalltbl3 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 Map Operator Tree: TableScan alias: smalltbl4 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1319,10 +1325,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1487,177 +1493,208 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Map 10 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: bigtbl - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key1 is not null and value is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key1 (type: string), key2 (type: string), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - input vertices: - 1 Map 4 - Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - input vertices: - 1 Map 5 - Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - input vertices: - 1 Map 6 - Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 10 + Map Operator Tree: + TableScan + alias: smalltbl4 + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 7 Map Operator Tree: TableScan alias: smalltbl1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 8 Map Operator Tree: TableScan alias: smalltbl2 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 9 Map Operator Tree: TableScan alias: smalltbl3 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: smalltbl4 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 243 Data size: 85050 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 243 Data size: 107163 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 243 Data size: 107163 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 220 Data size: 117700 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 220 Data size: 95700 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 220 Data size: 95700 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 220 Data size: 136400 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 220 Data size: 114400 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 220 Data size: 114400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 314 Data size: 221370 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 314 Data size: 221370 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1833,19 +1870,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: bigtbl - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key1 is not null and value is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key1 (type: string), key2 (type: string), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1853,57 +1890,57 @@ STAGE PLANS: Map Operator Tree: TableScan alias: smalltbl4 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 Map Operator Tree: TableScan alias: smalltbl1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 8 Map Operator Tree: TableScan alias: smalltbl2 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1911,19 +1948,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: smalltbl3 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1936,16 +1973,16 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 85050 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 107163 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: + Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 107163 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reducer 3 Execution mode: llap @@ -1957,16 +1994,16 @@ STAGE PLANS: 0 _col3 (type: string) 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 117700 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 95700 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 95700 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Reducer 4 Execution mode: llap @@ -1978,16 +2015,16 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 136400 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 114400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 114400 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) Reducer 5 Execution mode: llap @@ -1999,19 +2036,19 @@ STAGE PLANS: 0 _col2 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 314 Data size: 221370 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 314 Data size: 221370 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) Reducer 6 Execution mode: llap @@ -2020,10 +2057,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/multiMapJoin2.q.out b/ql/src/test/results/clientpositive/llap/multiMapJoin2.q.out index b4b0e93c82..4472dff8b6 100644 --- a/ql/src/test/results/clientpositive/llap/multiMapJoin2.q.out +++ b/ql/src/test/results/clientpositive/llap/multiMapJoin2.q.out @@ -1688,6 +1688,7 @@ RUN: Stage-1:MAPRED RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE RUN: Stage-3:STATS +RUN: Stage-4:COLUMNSTATS PREHOOK: query: INSERT OVERWRITE TABLE part_table PARTITION (partitionId=2) SELECT key, value FROM src1 ORDER BY key, value PREHOOK: type: QUERY @@ -1704,6 +1705,7 @@ RUN: Stage-1:MAPRED RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE RUN: Stage-3:STATS +RUN: Stage-4:COLUMNSTATS PREHOOK: query: EXPLAIN SELECT count(*) FROM part_table x JOIN src1 y ON (x.key = y.key) @@ -1729,14 +1731,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - Statistics: Num rows: 125 Data size: 2261 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 10875 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 125 Data size: 2261 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 10875 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 125 Data size: 2261 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 10875 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1745,15 +1747,15 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 3 - Statistics: Num rows: 137 Data size: 2487 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 72 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1783,10 +1785,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out b/ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out index 0f4f803369..39feaec783 100644 --- a/ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out @@ -40,21 +40,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee - Statistics: Num rows: 12 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: department_id (type: int), gender (type: varchar(10)), education_level (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int), _col1 (type: varchar(10)), _col2 (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 36 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 1556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: int) sort order: ++++ Map-reduce partition columns: _col0 (type: int), _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 36 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 1556 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -64,19 +64,19 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: varchar(10)), KEY._col2 (type: int), KEY._col3 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 18 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 1556 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CASE WHEN (((_col3 = 3) and _col0 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 5) and _col1 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 6) and _col2 is not null)) THEN (1) ELSE (null) END (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 18 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 1556 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0), count(_col1), count(_col2) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: llap @@ -85,10 +85,10 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -181,21 +181,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee - Statistics: Num rows: 12 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: gender (type: varchar(10)), department_id (type: int), education_level (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: varchar(10)), _col1 (type: int), _col2 (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 330 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 2534 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: varchar(10)), _col1 (type: int), _col2 (type: int), _col3 (type: int) sort order: ++++ Map-reduce partition columns: _col0 (type: varchar(10)), _col1 (type: int), _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 60 Data size: 330 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 2534 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -205,19 +205,19 @@ STAGE PLANS: keys: KEY._col0 (type: varchar(10)), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 30 Data size: 165 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 2534 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CASE WHEN (((_col3 = 3) and _col0 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 5) and _col1 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 6) and _col2 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN ((_col3 = 4)) THEN (1) ELSE (null) END (type: int), CASE WHEN ((_col3 = 0)) THEN (1) ELSE (null) END (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 30 Data size: 165 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 2534 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0), count(_col1), count(_col2), count(_col3), count(_col4) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) Reducer 3 Execution mode: llap @@ -226,14 +226,14 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col1 (type: bigint), _col0 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/multi_insert.q.out b/ql/src/test/results/clientpositive/llap/multi_insert.q.out index 58fc759f26..c4a766898d 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert.q.out @@ -29,6 +29,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -36,6 +38,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -57,6 +63,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -72,8 +91,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -91,6 +153,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -172,6 +248,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -179,6 +257,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -200,6 +282,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -215,8 +310,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -234,6 +372,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -315,6 +467,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -322,6 +476,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -343,6 +501,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -358,8 +529,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -377,6 +591,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -458,6 +686,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -465,6 +695,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -486,6 +720,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -501,8 +748,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -520,6 +810,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -601,6 +905,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -610,6 +916,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -648,6 +956,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -664,6 +985,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -681,6 +1045,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -755,6 +1133,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -764,6 +1144,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -802,6 +1184,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -818,6 +1213,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -835,6 +1273,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -909,6 +1361,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -918,6 +1372,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -956,6 +1412,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -972,6 +1441,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -989,6 +1501,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1063,6 +1589,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -1072,6 +1600,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1110,6 +1640,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -1126,6 +1669,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1143,6 +1729,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1217,6 +1817,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -1226,7 +1828,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) + Map 5 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Union 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1249,6 +1853,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1260,9 +1877,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 5 Map Operator Tree: TableScan alias: src @@ -1282,6 +1912,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1293,8 +1936,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -1314,6 +2000,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1414,6 +2114,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -1423,7 +2125,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) + Map 5 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Union 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1446,6 +2150,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1457,9 +2174,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 5 Map Operator Tree: TableScan alias: src @@ -1479,6 +2209,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1490,8 +2233,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -1511,6 +2297,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1611,6 +2411,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -1620,7 +2422,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) + Map 5 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Union 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1643,6 +2447,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1654,9 +2471,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 5 Map Operator Tree: TableScan alias: src @@ -1676,6 +2506,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1687,8 +2530,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -1708,6 +2594,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1808,6 +2708,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -1817,7 +2719,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) + Map 5 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Union 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1840,6 +2744,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1851,9 +2768,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 5 Map Operator Tree: TableScan alias: src @@ -1873,6 +2803,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1884,8 +2827,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -1905,6 +2891,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out b/ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out index e5497d5a79..49b70a3d17 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out @@ -49,6 +49,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -56,6 +58,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -83,6 +89,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 @@ -105,6 +124,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Lateral View Forward Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -126,6 +158,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: array((key + 3),(key + 4)) (type: array) outputColumnNames: _col0 @@ -148,8 +193,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -167,6 +255,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 + Stage: Stage-1 Move Operator tables: @@ -269,6 +371,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -278,7 +382,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -395,10 +501,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -416,6 +550,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -433,6 +595,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 + Stage: Stage-1 Move Operator tables: @@ -517,6 +693,9 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-5, Stage-6, Stage-7 + Stage-10 depends on stages: Stage-5, Stage-6, Stage-7 Stage-1 depends on stages: Stage-4 Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -528,7 +707,10 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -611,9 +793,37 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Execution mode: llap Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: Forward Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -637,6 +847,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE @@ -658,6 +881,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -675,6 +941,27 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv3 + Stage: Stage-1 Move Operator tables: @@ -786,6 +1073,9 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-5, Stage-6, Stage-7 + Stage-10 depends on stages: Stage-5, Stage-6, Stage-7 Stage-1 depends on stages: Stage-4 Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -797,8 +1087,11 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 1 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -926,10 +1219,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: double) mode: mergepartial @@ -947,7 +1268,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -968,6 +1317,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -985,6 +1362,27 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv3 + Stage: Stage-1 Move Operator tables: @@ -1136,6 +1534,10 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 + Stage-12 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 + Stage-13 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 Stage-1 depends on stages: Stage-5 Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-5 @@ -1149,8 +1551,12 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 1 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1271,10 +1677,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -1292,7 +1726,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Forward @@ -1318,6 +1780,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col1:0._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE @@ -1339,6 +1814,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection @@ -1356,6 +1874,34 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv3 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv4 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/orc_analyze.q.out b/ql/src/test/results/clientpositive/llap/orc_analyze.q.out index 1cc9d61b48..1c6996463a 100644 --- a/ql/src/test/results/clientpositive/llap/orc_analyze.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_analyze.q.out @@ -281,7 +281,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\",\"state\":\"true\"}} numFiles 1 numRows 100 rawDataSize 52600 @@ -728,7 +728,7 @@ Database: default Table: orc_create_people #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\"}} numFiles 1 numRows 50 rawDataSize 21950 @@ -771,7 +771,7 @@ Database: default Table: orc_create_people #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\"}} numFiles 1 numRows 50 rawDataSize 22050 @@ -1230,7 +1230,7 @@ Database: default Table: orc_create_people #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\"}} numFiles 4 numRows 50 rawDataSize 21975 @@ -1273,7 +1273,7 @@ Database: default Table: orc_create_people #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\"}} numFiles 4 numRows 50 rawDataSize 22043 diff --git a/ql/src/test/results/clientpositive/llap/orc_merge1.q.out b/ql/src/test/results/clientpositive/llap/orc_merge1.q.out index ba29491001..18573a65cd 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge1.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge1.q.out @@ -55,11 +55,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -78,8 +82,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -100,6 +140,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part FROM src @@ -136,6 +183,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -144,6 +192,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -162,8 +213,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -193,6 +280,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b + Stage: Stage-4 Tez #### A masked pattern was here #### @@ -265,6 +359,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -273,6 +368,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -291,8 +389,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -322,6 +456,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c + Stage: Stage-4 Tez #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/orc_merge10.q.out b/ql/src/test/results/clientpositive/llap/orc_merge10.q.out index dd5d1cbbe7..08da6ac13b 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge10.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge10.q.out @@ -55,11 +55,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -78,8 +82,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -100,6 +140,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part FROM src @@ -136,6 +183,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -144,6 +192,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -162,8 +213,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -193,6 +280,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b + Stage: Stage-4 Tez #### A masked pattern was here #### @@ -265,6 +359,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -273,6 +368,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -291,8 +389,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -322,6 +456,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c + Stage: Stage-4 Tez #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/orc_merge2.q.out b/ql/src/test/results/clientpositive/llap/orc_merge2.q.out index c38852a95b..6c3bddf096 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge2.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge2.q.out @@ -29,11 +29,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -52,8 +56,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge2a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string) + outputColumnNames: key, value, one, two, three + Statistics: Num rows: 500 Data size: 274000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: one (type: string), two (type: string), three (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 710500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 710500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 706500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 706500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 706500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -75,6 +115,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge2a + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge2a PARTITION (one='1', two, three) SELECT key, value, PMOD(HASH(key), 10) as two, PMOD(HASH(value), 10) as three diff --git a/ql/src/test/results/clientpositive/llap/orc_merge3.q.out b/ql/src/test/results/clientpositive/llap/orc_merge3.q.out index fd79d9d05f..735ff50313 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge3.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge3.q.out @@ -59,31 +59,63 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: orcfile_merge3a - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -101,6 +133,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge3b SELECT key, value FROM orcfile_merge3a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/orc_merge4.q.out b/ql/src/test/results/clientpositive/llap/orc_merge4.q.out index 6e2617b15d..dcb5b18ea6 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge4.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge4.q.out @@ -77,31 +77,63 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: orcfile_merge3a - Statistics: Num rows: 1000 Data size: 102000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 102000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 102000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -119,6 +151,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge3b SELECT key, value FROM orcfile_merge3a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/orc_merge5.q.out b/ql/src/test/results/clientpositive/llap/orc_merge5.q.out index 47096bba65..6688f3b888 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge5.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge5.q.out @@ -31,11 +31,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -58,8 +62,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -77,6 +109,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b + PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 @@ -122,6 +161,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -130,6 +170,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -152,8 +195,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -180,6 +251,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b + Stage: Stage-4 Tez #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/orc_merge6.q.out b/ql/src/test/results/clientpositive/llap/orc_merge6.q.out index 3969a9631c..31ef896548 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge6.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge6.q.out @@ -31,11 +31,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -58,8 +62,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -80,6 +120,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + PREHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 @@ -167,6 +214,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -175,6 +223,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -197,8 +248,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -228,6 +315,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + Stage: Stage-4 Tez #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/orc_merge7.q.out b/ql/src/test/results/clientpositive/llap/orc_merge7.q.out index d86d2dcf2c..463ba7bfdd 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge7.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge7.q.out @@ -31,11 +31,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -54,8 +58,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -75,6 +115,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 @@ -201,6 +248,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -209,6 +257,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -227,8 +278,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -257,6 +344,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + Stage: Stage-4 Tez #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out b/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out index ba29491001..18573a65cd 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out @@ -55,11 +55,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -78,8 +82,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -100,6 +140,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part FROM src @@ -136,6 +183,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -144,6 +192,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -162,8 +213,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -193,6 +280,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b + Stage: Stage-4 Tez #### A masked pattern was here #### @@ -265,6 +359,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -273,6 +368,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -291,8 +389,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -322,6 +456,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c + Stage: Stage-4 Tez #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out b/ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out index e5101f449d..ba18346a8b 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out @@ -31,11 +31,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -57,8 +61,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -76,6 +108,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b + PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 diff --git a/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out b/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out index 6fcb45a600..c3ef44663c 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out @@ -31,11 +31,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -54,8 +58,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -75,6 +115,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 order by userid PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 diff --git a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out index 8c6f97e613..9c523f345d 100644 --- a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out @@ -133,19 +133,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -156,10 +156,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -191,19 +191,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -214,10 +214,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -321,22 +321,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t < 0) and (UDFToInteger(t) > -2)) (type: boolean) - Statistics: Num rows: 116 Data size: 34409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 116 Data size: 34409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -347,10 +347,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -389,22 +389,22 @@ STAGE PLANS: TableScan alias: orc_pred filterExpr: ((t < 0) and (UDFToInteger(t) > -2)) (type: boolean) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t < 0) and (UDFToInteger(t) > -2)) (type: boolean) - Statistics: Num rows: 116 Data size: 34409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 116 Data size: 34409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -415,10 +415,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -589,18 +589,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -609,10 +609,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -655,18 +655,18 @@ STAGE PLANS: TableScan alias: orc_pred filterExpr: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -675,10 +675,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -786,18 +786,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -808,13 +808,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -865,18 +865,18 @@ STAGE PLANS: TableScan alias: orc_pred filterExpr: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -887,13 +887,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1010,18 +1010,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t > 10) and (t <> 101) and (d >= 10) and (d < 12.0) and (s like '%son') and (not (s like '%car%')) and (t > 0) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1032,14 +1032,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1048,13 +1048,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1110,18 +1110,18 @@ STAGE PLANS: TableScan alias: orc_pred filterExpr: ((t > 10) and (t <> 101) and (d >= 10) and (d < 12.0) and (s like '%son') and (not (s like '%car%')) and (t > 0) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t > 10) and (t <> 101) and (d >= 10) and (d < 12.0) and (s like '%son') and (not (s like '%car%')) and (t > 0) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1132,14 +1132,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1148,13 +1148,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/parallel.q.out b/ql/src/test/results/clientpositive/llap/parallel.q.out index 7dba1226a4..95ed8b813a 100644 --- a/ql/src/test/results/clientpositive/llap/parallel.q.out +++ b/ql/src/test/results/clientpositive/llap/parallel.q.out @@ -29,6 +29,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -39,6 +41,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -93,6 +97,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_a + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete @@ -106,6 +123,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_b + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -123,6 +183,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_a + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_b + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out index aecbcfdfe4..c3f6850cc1 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out @@ -125,19 +125,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -148,10 +148,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -183,19 +183,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -206,10 +206,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -527,18 +527,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -547,10 +547,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -593,18 +593,18 @@ STAGE PLANS: TableScan alias: tbl_pred filterExpr: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -613,10 +613,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -724,18 +724,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -746,13 +746,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -803,18 +803,18 @@ STAGE PLANS: TableScan alias: tbl_pred filterExpr: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -825,13 +825,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -993,18 +993,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t > 10) and (t <> 101) and (d >= 10) and (d < 12.0) and (s like '%son') and (not (s like '%car%')) and (t > 0) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1015,14 +1015,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1031,13 +1031,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1093,18 +1093,18 @@ STAGE PLANS: TableScan alias: tbl_pred filterExpr: ((t > 10) and (t <> 101) and (d >= 10) and (d < 12.0) and (s like '%son') and (not (s like '%car%')) and (t > 0) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t > 10) and (t <> 101) and (d >= 10) and (d < 12.0) and (s like '%son') and (not (s like '%car%')) and (t > 0) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1115,14 +1115,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1131,13 +1131,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1205,18 +1205,18 @@ STAGE PLANS: TableScan alias: tbl_pred filterExpr: ((f < 123.2) and (f > 1.92) and (f >= 9.99) and f BETWEEN 1.92 AND 123.2 and (i < 67627) and (i > 60627) and (i >= 60626) and i BETWEEN 60626 AND 67627 and (b < 4294967861) and (b > 4294967261) and (b >= 4294967260) and b BETWEEN 4294967261 AND 4294967861) (type: boolean) - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 16784 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((f < 123.2) and (f > 1.92) and (f >= 9.99) and f BETWEEN 1.92 AND 123.2 and (i < 67627) and (i > 60627) and (i >= 60626) and i BETWEEN 60626 AND 67627 and (b < 4294967861) and (b > 4294967261) and (b >= 4294967260) and b BETWEEN 4294967261 AND 4294967861) (type: boolean) - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: f (type: float), i (type: int), b (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float) sort order: - - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: int), _col2 (type: bigint) Execution mode: llap @@ -1227,14 +1227,14 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: int), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float) sort order: - - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: int), _col2 (type: bigint) Reducer 3 @@ -1243,13 +1243,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: int), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/ppd_union_view.q.out b/ql/src/test/results/clientpositive/llap/ppd_union_view.q.out index 543d17630a..8d328b5a59 100644 --- a/ql/src/test/results/clientpositive/llap/ppd_union_view.q.out +++ b/ql/src/test/results/clientpositive/llap/ppd_union_view.q.out @@ -166,13 +166,13 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), '2011-10-13' (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -197,22 +197,22 @@ STAGE PLANS: alias: t1_old properties: insideView TRUE - Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 181 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: keymap is not null (type: boolean) - Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 181 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: keymap (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 275 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 275 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col1 (type: string) auto parallelism: true @@ -229,7 +229,7 @@ STAGE PLANS: partition values: ds 2011-10-13 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"keymap":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns keymap,value @@ -276,22 +276,22 @@ STAGE PLANS: alias: t1_mapping properties: insideView TRUE - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 179 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: keymap is not null (type: boolean) - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 179 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), keymap (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 273 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 273 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col0 (type: string) auto parallelism: true @@ -308,7 +308,7 @@ STAGE PLANS: partition values: ds 2011-10-13 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","keymap":"true"}} bucket_count -1 column.name.delimiter , columns key,keymap @@ -361,21 +361,21 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col1, _col3 Position of Big Table: 0 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), '2011-10-13' (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -462,22 +462,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1_new - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), '2011-10-15' (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -507,7 +507,7 @@ STAGE PLANS: partition values: ds 2011-10-15 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -622,13 +622,13 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), '2011-10-15' (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/ptf.q.out b/ql/src/test/results/clientpositive/llap/ptf.q.out index fbaf1e6474..8448ed26ae 100644 --- a/ql/src/test/results/clientpositive/llap/ptf.q.out +++ b/ql/src/test/results/clientpositive/llap/ptf.q.out @@ -2956,6 +2956,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -2966,8 +2968,10 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3069,9 +3073,37 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: double) + outputColumnNames: p_mfgr, p_name, p_size, r, dr, s + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(p_mfgr, 16), compute_stats(p_name, 16), compute_stats(p_size, 16), compute_stats(r, 16), compute_stats(dr, 16), compute_stats(s, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2888 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2888 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reducer 4 Execution mode: llap Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col1, _col2, _col5 @@ -3106,7 +3138,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: sum_window_0 (type: bigint), _col5 (type: int) - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Select Operator @@ -3166,6 +3198,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: double), _col7 (type: int) + outputColumnNames: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(p_mfgr, 16), compute_stats(p_name, 16), compute_stats(p_size, 16), compute_stats(s2, 16), compute_stats(r, 16), compute_stats(dr, 16), compute_stats(cud, 16), compute_stats(fv1, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -3183,6 +3243,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, r, dr, s + Column Types: string, string, int, int, int, double + Table: default.part_4 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 + Column Types: string, string, int, int, int, int, double, int + Table: default.part_5 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/rcfile_createas1.q.out b/ql/src/test/results/clientpositive/llap/rcfile_createas1.q.out index 7d1f52657c..ac09b63d4c 100644 --- a/ql/src/test/results/clientpositive/llap/rcfile_createas1.q.out +++ b/ql/src/test/results/clientpositive/llap/rcfile_createas1.q.out @@ -68,14 +68,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: rcfile_createas1a - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string), (hash(key) pmod 50) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out b/ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out index 2bb8e8ab1d..edbe3d17b4 100644 --- a/ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out +++ b/ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out @@ -29,11 +29,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -52,8 +56,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.rcfile_merge2a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string) + outputColumnNames: key, value, one, two, three + Statistics: Num rows: 500 Data size: 274000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: one (type: string), two (type: string), three (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 710500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 710500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 706500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 706500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 706500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -75,6 +115,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.rcfile_merge2a + PREHOOK: query: INSERT OVERWRITE TABLE rcfile_merge2a PARTITION (one='1', two, three) SELECT key, value, PMOD(HASH(key), 10) as two, PMOD(HASH(value), 10) as three diff --git a/ql/src/test/results/clientpositive/llap/rcfile_merge3.q.out b/ql/src/test/results/clientpositive/llap/rcfile_merge3.q.out index 11b0b48189..8a81627192 100644 --- a/ql/src/test/results/clientpositive/llap/rcfile_merge3.q.out +++ b/ql/src/test/results/clientpositive/llap/rcfile_merge3.q.out @@ -59,31 +59,63 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: rcfile_merge3a - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.rcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -101,6 +133,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.rcfile_merge3b + PREHOOK: query: INSERT OVERWRITE TABLE rcfile_merge3b SELECT key, value FROM rcfile_merge3a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/rcfile_merge4.q.out b/ql/src/test/results/clientpositive/llap/rcfile_merge4.q.out index 443014b99e..ac85b1972e 100644 --- a/ql/src/test/results/clientpositive/llap/rcfile_merge4.q.out +++ b/ql/src/test/results/clientpositive/llap/rcfile_merge4.q.out @@ -59,31 +59,63 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: rcfile_merge3a - Statistics: Num rows: 1000 Data size: 17624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 17624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 17624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.rcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -101,6 +133,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.rcfile_merge3b + PREHOOK: query: INSERT OVERWRITE TABLE rcfile_merge3b SELECT key, value FROM rcfile_merge3a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out b/ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out index d74e4c7c78..68cb4b4f68 100644 --- a/ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out +++ b/ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -141,6 +142,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -178,6 +214,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.bucket5_1 + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucket5_1 select * from src cluster by key PREHOOK: type: QUERY @@ -261,6 +305,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -268,6 +313,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -356,6 +402,61 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: bigint), _col5 (type: string), _col6 (type: string), '2010-03-29' (type: string) + outputColumnNames: aid, bid, t, ctime, etime, l, et, ds + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(aid, 16), compute_stats(bid, 16), compute_stats(t, 16), compute_stats(ctime, 16), compute_stats(etime, 16), compute_stats(l, 16), compute_stats(et, 16) + keys: '2010-03-29' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3506 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: '2010-03-29' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '2010-03-29' (type: string) + Statistics: Num rows: 1 Data size: 3506 Basic stats: COMPLETE Column stats: PARTIAL + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + auto parallelism: true + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + keys: '2010-03-29' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3454 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), '2010-03-29' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3454 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 3454 Basic stats: COMPLETE Column stats: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types struct:struct:struct:struct:struct:struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -391,3 +492,11 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: aid, bid, t, ctime, etime, l, et + Column Types: string, string, int, string, bigint, string, string + Table: default.complex_tbl_1 + Is Table Level Stats: false + diff --git a/ql/src/test/results/clientpositive/llap/reduce_deduplicate_distinct.q.out b/ql/src/test/results/clientpositive/llap/reduce_deduplicate_distinct.q.out index e5b8d11c58..2955ee67b2 100644 --- a/ql/src/test/results/clientpositive/llap/reduce_deduplicate_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/reduce_deduplicate_distinct.q.out @@ -39,22 +39,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_distinct_test - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: int), key (type: int), name (type: int) outputColumnNames: id, key, name - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT key), count(DISTINCT name) keys: id (type: int), key (type: int), name (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -65,10 +65,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -118,21 +118,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_distinct_test - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: int), key (type: int), name (type: int) outputColumnNames: id, key, name - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: id (type: int), key (type: int), name (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -142,18 +142,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT _col1), count(DISTINCT _col2) keys: _col0 (type: int), _col1 (type: int), _col2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -162,10 +162,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -215,21 +215,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_distinct_test - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: int), key (type: int), name (type: int) outputColumnNames: id, key, name - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: id (type: int), key (type: int), name (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -239,18 +239,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT _col2), count(DISTINCT _col1) keys: _col0 (type: int), _col2 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -259,10 +259,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -312,21 +312,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_distinct_test - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: int), key (type: int), name (type: int) outputColumnNames: id, key, name - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: id (type: int), key (type: int), name (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -336,18 +336,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT _col1), count(DISTINCT _col2) keys: _col0 (type: int), _col1 (type: int), _col2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -356,10 +356,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -409,21 +409,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_distinct_test - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: int), key (type: int), name (type: int) outputColumnNames: id, key, name - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: id (type: int), key (type: int), name (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -433,18 +433,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT _col2), count(DISTINCT _col1) keys: _col0 (type: int), _col2 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -453,10 +453,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/sample1.q.out b/ql/src/test/results/clientpositive/llap/sample1.q.out index 1a7fb3254e..e0610e3287 100644 --- a/ql/src/test/results/clientpositive/llap/sample1.q.out +++ b/ql/src/test/results/clientpositive/llap/sample1.q.out @@ -21,11 +21,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -73,6 +77,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, dt, hr + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(dt, 16), compute_stats(hr, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -128,6 +148,37 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [s] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -164,6 +215,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, dt, hr + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1 ON rand()) s WHERE s.ds='2008-04-08' and s.hr='11' diff --git a/ql/src/test/results/clientpositive/llap/sample10.q.out b/ql/src/test/results/clientpositive/llap/sample10.q.out index a3da78754f..8552a3a982 100644 --- a/ql/src/test/results/clientpositive/llap/sample10.q.out +++ b/ql/src/test/results/clientpositive/llap/sample10.q.out @@ -56,29 +56,29 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpartbucket - Statistics: Num rows: 40 Data size: 7600 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 40 Data size: 10760 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: true predicate: (((hash(key) & 2147483647) % 4) = 0) (type: boolean) sampleDesc: BUCKET 1 OUT OF 4 - Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 5380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) outputColumnNames: ds - Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 5380 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: ds (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: true @@ -96,7 +96,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , @@ -146,7 +146,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , @@ -196,7 +196,7 @@ STAGE PLANS: ds 2008-04-09 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , @@ -246,7 +246,7 @@ STAGE PLANS: ds 2008-04-09 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , @@ -301,12 +301,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false @@ -317,13 +317,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_complex.q.out b/ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_complex.q.out index 22c7745345..c284b5c1b8 100644 --- a/ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_complex.q.out +++ b/ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_complex.q.out @@ -431,14 +431,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_add_various_various_struct2 - Statistics: Num rows: 8 Data size: 4912 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: insert_num (type: int), part (type: int), b (type: string), s2 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out b/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out index d0cafaa2db..7d5d07302d 100644 --- a/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out +++ b/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out @@ -437,14 +437,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_add_various_various_struct2 - Statistics: Num rows: 8 Data size: 4912 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: insert_num (type: int), part (type: int), b (type: string), s2 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_part_all_complex.q.out b/ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_part_all_complex.q.out index 12c7db4af8..591e54a6e2 100644 --- a/ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_part_all_complex.q.out +++ b/ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_part_all_complex.q.out @@ -431,14 +431,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_add_various_various_struct2 - Statistics: Num rows: 8 Data size: 939 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: insert_num (type: int), part (type: int), b (type: string), s2 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out index 97270fcbee..89a2554c07 100644 --- a/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out +++ b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out @@ -437,14 +437,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_add_various_various_struct2 - Statistics: Num rows: 8 Data size: 939 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: insert_num (type: int), part (type: int), b (type: string), s2 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out b/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out index b35bcccdde..e8c3fe0d12 100644 --- a/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out +++ b/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out @@ -437,14 +437,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_add_various_various_struct2 - Statistics: Num rows: 8 Data size: 939 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: insert_num (type: int), part (type: int), b (type: string), s2 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/skewjoin.q.out b/ql/src/test/results/clientpositive/llap/skewjoin.q.out index dc79b26020..9a20074e2a 100644 --- a/ql/src/test/results/clientpositive/llap/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/skewjoin.q.out @@ -83,13 +83,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -111,7 +113,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -154,6 +156,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1219 Data size: 115805 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -171,6 +201,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/skewjoinopt15.q.out b/ql/src/test/results/clientpositive/llap/skewjoinopt15.q.out index 8aeb02959a..d9d020151b 100644 --- a/ql/src/test/results/clientpositive/llap/skewjoinopt15.q.out +++ b/ql/src/test/results/clientpositive/llap/skewjoinopt15.q.out @@ -88,19 +88,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), val (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -108,19 +108,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), val (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -134,10 +134,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -189,16 +189,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), val (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -206,16 +206,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), val (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -229,10 +229,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -287,38 +287,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -330,15 +330,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -347,10 +347,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -396,32 +396,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -433,15 +433,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -450,10 +450,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/smb_cache.q.out b/ql/src/test/results/clientpositive/llap/smb_cache.q.out index 932b21105b..05162f6a3c 100644 --- a/ql/src/test/results/clientpositive/llap/smb_cache.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_cache.q.out @@ -211,26 +211,26 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_15] (rows=579 width=8) + Merge Join Operator [MERGEJOIN_15] (rows=123 width=8) Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] PartitionCols:_col0 - Select Operator [SEL_2] (rows=130 width=8) + Select Operator [SEL_2] (rows=130 width=4) Output:["_col0"] - Filter Operator [FIL_13] (rows=130 width=8) + Filter Operator [FIL_13] (rows=130 width=4) predicate:userid is not null - TableScan [TS_0] (rows=130 width=8) - default@bug_201_input_b,t1,Tbl:COMPLETE,Col:NONE,Output:["userid"] + TableScan [TS_0] (rows=130 width=4) + default@bug_201_input_b,t1,Tbl:COMPLETE,Col:COMPLETE,Output:["userid"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_7] PartitionCols:_col0 - Select Operator [SEL_5] (rows=527 width=8) + Select Operator [SEL_5] (rows=527 width=4) Output:["_col0"] - Filter Operator [FIL_14] (rows=527 width=8) + Filter Operator [FIL_14] (rows=527 width=4) predicate:userid is not null - TableScan [TS_3] (rows=527 width=8) - default@bug_201_input_a,fa,Tbl:COMPLETE,Col:NONE,Output:["userid"] + TableScan [TS_3] (rows=527 width=4) + default@bug_201_input_a,fa,Tbl:COMPLETE,Col:COMPLETE,Output:["userid"] PREHOOK: query: select t1.userid, @@ -291,26 +291,26 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_15] (rows=579 width=8) + Merge Join Operator [MERGEJOIN_15] (rows=123 width=8) Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] PartitionCols:_col0 - Select Operator [SEL_2] (rows=130 width=8) + Select Operator [SEL_2] (rows=130 width=4) Output:["_col0"] - Filter Operator [FIL_13] (rows=130 width=8) + Filter Operator [FIL_13] (rows=130 width=4) predicate:userid is not null - TableScan [TS_0] (rows=130 width=8) - default@bug_201_input_b,t1,Tbl:COMPLETE,Col:NONE,Output:["userid"] + TableScan [TS_0] (rows=130 width=4) + default@bug_201_input_b,t1,Tbl:COMPLETE,Col:COMPLETE,Output:["userid"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_7] PartitionCols:_col0 - Select Operator [SEL_5] (rows=527 width=8) + Select Operator [SEL_5] (rows=527 width=4) Output:["_col0"] - Filter Operator [FIL_14] (rows=527 width=8) + Filter Operator [FIL_14] (rows=527 width=4) predicate:userid is not null - TableScan [TS_3] (rows=527 width=8) - default@bug_201_input_a,fa,Tbl:COMPLETE,Col:NONE,Output:["userid"] + TableScan [TS_3] (rows=527 width=4) + default@bug_201_input_a,fa,Tbl:COMPLETE,Col:COMPLETE,Output:["userid"] PREHOOK: query: select t1.userid, diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out index df32ad4ae3..a3c96bbdf2 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out @@ -65,38 +65,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -108,15 +108,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -125,10 +125,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -189,38 +189,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -233,18 +233,18 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -254,11 +254,11 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -266,10 +266,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -345,38 +345,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -389,17 +389,17 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -407,17 +407,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 4 Execution mode: llap @@ -426,10 +426,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -497,38 +497,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -540,15 +540,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -557,10 +557,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -632,38 +632,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -675,15 +675,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -692,10 +692,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -791,38 +791,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -834,15 +834,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -851,10 +851,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -938,38 +938,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -981,15 +981,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -998,10 +998,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1063,38 +1063,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1106,15 +1106,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -1123,10 +1123,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1184,38 +1184,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1227,15 +1227,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -1244,10 +1244,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1301,38 +1301,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1344,15 +1344,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -1361,10 +1361,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1428,57 +1428,57 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1492,15 +1492,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -1509,10 +1509,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1592,38 +1592,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1635,15 +1635,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -1652,10 +1652,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out index df89454387..2a8f6bed46 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out @@ -55,22 +55,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col1 (type: string) auto parallelism: true @@ -85,7 +85,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -108,7 +108,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -135,22 +135,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: string) auto parallelism: true @@ -165,7 +165,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -188,7 +188,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -223,12 +223,12 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Position of Big Table: 0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 242250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 242250 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 TopN: 10 TopN Hash Memory Usage: 0.1 @@ -241,16 +241,16 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 242250 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1900 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1900 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -369,22 +369,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col2 (type: string) auto parallelism: true @@ -399,7 +399,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -422,7 +422,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -449,22 +449,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col2 (type: string) auto parallelism: true @@ -479,7 +479,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -502,7 +502,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -537,12 +537,12 @@ STAGE PLANS: 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Position of Big Table: 0 - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 TopN: 10 TopN Hash Memory Usage: 0.1 @@ -555,16 +555,16 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -631,22 +631,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col2 (type: string) auto parallelism: true @@ -661,7 +661,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -684,7 +684,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -711,22 +711,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col2 (type: string) auto parallelism: true @@ -741,7 +741,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -764,7 +764,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -799,12 +799,12 @@ STAGE PLANS: 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Position of Big Table: 0 - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 TopN: 10 TopN Hash Memory Usage: 0.1 @@ -817,16 +817,16 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -893,22 +893,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col2 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: int), _col2 (type: string) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col1 (type: int) auto parallelism: true @@ -923,7 +923,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -946,7 +946,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -973,22 +973,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col2 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: int), _col2 (type: string) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: int) auto parallelism: true @@ -1003,7 +1003,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -1026,7 +1026,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -1061,12 +1061,12 @@ STAGE PLANS: 1 _col0 (type: int), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Position of Big Table: 0 - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 TopN: 10 TopN Hash Memory Usage: 0.1 @@ -1079,16 +1079,16 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_17.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_17.q.out index 183c022a5d..803f40487a 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_17.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_17.q.out @@ -193,133 +193,133 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 Map Operator Tree: TableScan alias: d - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 Map Operator Tree: TableScan alias: e - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 8 Map Operator Tree: TableScan alias: f - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 9 Map Operator Tree: TableScan alias: g - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -341,15 +341,15 @@ STAGE PLANS: 4 _col0 (type: int) 5 _col0 (type: int) 6 _col0 (type: int) - Statistics: Num rows: 66 Data size: 462 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -358,10 +358,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -440,59 +440,59 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: c - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: d - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: e - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: f - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: g - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Left Outer Join 0 to 1 @@ -509,15 +509,15 @@ STAGE PLANS: 4 _col0 (type: int) 5 _col0 (type: int) 6 _col0 (type: int) - Statistics: Num rows: 66 Data size: 462 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -527,10 +527,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -611,67 +611,67 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: c - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: d - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: e - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: f - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: g - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: h - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Left Outer Join 0 to 1 @@ -690,15 +690,15 @@ STAGE PLANS: 5 _col0 (type: int) 6 _col0 (type: int) 7 _col0 (type: int) - Statistics: Num rows: 77 Data size: 539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 357 Data size: 2856 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -708,10 +708,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -822,131 +822,131 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: c - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: d - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: e - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: f - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: g - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: h - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: i - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: j - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: k - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: l - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: m - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: n - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: o - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: p - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Left Outer Join 0 to 1 @@ -982,76 +982,76 @@ STAGE PLANS: 8 _col0 (type: int) 9 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 165 Data size: 1155 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21268 Data size: 1977924 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 165 Data size: 1155 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21268 Data size: 1977924 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap Map 18 Map Operator Tree: TableScan alias: q - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 19 Map Operator Tree: TableScan alias: r - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 20 Map Operator Tree: TableScan alias: s - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 21 Map Operator Tree: TableScan alias: t - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1070,10 +1070,10 @@ STAGE PLANS: 3 _col0 (type: int) 4 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 726 Data size: 5082 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 164104 Data size: 15261672 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 726 Data size: 5082 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 164104 Data size: 15261672 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_18.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_18.q.out index e2d5e4e4c2..5fbac774e1 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_18.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_18.q.out @@ -43,6 +43,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -53,14 +54,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -87,6 +88,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY @@ -220,6 +228,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -227,25 +236,26 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key = 238) (type: boolean) - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 238 (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -255,15 +265,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -283,6 +329,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238 PREHOOK: type: QUERY @@ -363,6 +416,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -373,14 +427,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 2 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -407,6 +461,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_19.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_19.q.out index c3b29ea2bd..bb10c12b4a 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_19.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_19.q.out @@ -43,6 +43,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -53,14 +54,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -87,6 +88,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out index 2c1ee962bc..823ba1cd1e 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out @@ -67,52 +67,54 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -126,15 +128,43 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 242250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 242250 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 1275 Data size: 242250 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -152,6 +182,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results + PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key PREHOOK: type: QUERY @@ -1249,52 +1286,54 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1308,15 +1347,43 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 242250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 242250 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 1275 Data size: 242250 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1334,6 +1401,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results + PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key PREHOOK: type: QUERY @@ -2447,52 +2521,54 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1000) (type: boolean) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1000) (type: boolean) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2506,15 +2582,43 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2532,6 +2636,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results + PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 PREHOOK: type: QUERY @@ -2561,52 +2672,54 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1000) (type: boolean) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1000) (type: boolean) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2620,15 +2733,43 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2646,6 +2787,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results + PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 PREHOOK: type: QUERY @@ -2684,19 +2832,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1000) (type: boolean) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2704,19 +2852,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1000) (type: boolean) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2724,19 +2872,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1000) (type: boolean) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2752,10 +2900,10 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/sqlmerge.q.out b/ql/src/test/results/clientpositive/llap/sqlmerge.q.out index 562d4440ba..2f353df024 100644 --- a/ql/src/test/results/clientpositive/llap/sqlmerge.q.out +++ b/ql/src/test/results/clientpositive/llap/sqlmerge.q.out @@ -29,6 +29,8 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 Stage-2 depends on stages: Stage-5 Stage-7 depends on stages: Stage-2 Stage-3 depends on stages: Stage-5 @@ -41,11 +43,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 2 (SIMPLE_EDGE) Reducer 5 <- Reducer 2 (SIMPLE_EDGE) Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -61,7 +64,7 @@ STAGE PLANS: value expressions: ROW__ID (type: struct) Execution mode: llap LLAP IO: may be used (ACID table) - Map 7 + Map 8 Map Operator Tree: TableScan alias: s @@ -197,6 +200,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_tmp_table + Select Operator + expressions: _col0 (type: int) + outputColumnNames: val + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(val, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -213,6 +236,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl Write Type: INSERT + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection @@ -230,6 +281,20 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.acidtbl + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: val + Column Types: int + Table: default.merge_tmp_table + Stage: Stage-2 Move Operator tables: @@ -280,14 +345,16 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -302,7 +369,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Execution mode: llap LLAP IO: may be used (ACID table) - Map 4 + Map 5 Map Operator Tree: TableScan alias: s @@ -354,6 +421,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl Write Type: INSERT + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -371,3 +466,10 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.acidtbl + diff --git a/ql/src/test/results/clientpositive/llap/stats11.q.out b/ql/src/test/results/clientpositive/llap/stats11.q.out index 48d76cf179..f4087753ef 100644 --- a/ql/src/test/results/clientpositive/llap/stats11.q.out +++ b/ql/src/test/results/clientpositive/llap/stats11.q.out @@ -307,13 +307,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -392,7 +394,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -519,6 +521,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -555,6 +604,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin_part b @@ -673,13 +730,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -758,7 +817,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -864,7 +923,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -885,6 +944,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -898,7 +1004,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -921,6 +1027,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin_part b diff --git a/ql/src/test/results/clientpositive/llap/stats_only_null.q.out b/ql/src/test/results/clientpositive/llap/stats_only_null.q.out index 57aaf557b2..1871db0f4c 100644 --- a/ql/src/test/results/clientpositive/llap/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/llap/stats_only_null.q.out @@ -73,56 +73,12 @@ POSTHOOK: query: explain select count(*), count(a), count(b), count(c), count(d) from stats_null POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_null - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -133,56 +89,12 @@ POSTHOOK: query: explain select count(*), count(a), count(b), count(c), count(d) from stats_null_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_null_part - Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d - Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink diff --git a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out index 3004e36c9d..685360d900 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out @@ -970,14 +970,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -1012,10 +1012,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out index 1f9c9e4474..07b4853613 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -4991,19 +4991,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: i is not null (type: boolean) - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToLong(_col0) (type: bigint) sort order: + Map-reduce partition columns: UDFToLong(_col0) (type: bigint) - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -5035,10 +5035,10 @@ STAGE PLANS: 0 UDFToLong(_col0) (type: bigint) 1 _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5199,16 +5199,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToLong(_col0) (type: bigint), _col1 (type: int) sort order: ++ Map-reduce partition columns: UDFToLong(_col0) (type: bigint), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -5216,21 +5216,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tt - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: j is not null (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(i) keys: j (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -5244,10 +5244,10 @@ STAGE PLANS: 0 UDFToLong(_col0) (type: bigint), _col1 (type: int) 1 _col0 (type: bigint), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5260,16 +5260,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: bigint), _col1 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out index 29516eff82..65c1c4fe04 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out @@ -97,19 +97,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (p_size is not null and p_brand is not null) (type: boolean) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col5 (type: int) sort order: + Map-reduce partition columns: _col5 (type: int) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs @@ -117,44 +117,44 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_size is not null (type: boolean) - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_size (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2944 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_brand is not null (type: boolean) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2944 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_brand (type: string) outputColumnNames: _col0 - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2944 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -167,12 +167,12 @@ STAGE PLANS: 0 _col5 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 447 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: + Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 447 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -184,10 +184,10 @@ STAGE PLANS: 0 _col3 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 491 Data size: 1969 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 491 Data size: 1969 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -199,12 +199,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 407 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 407 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -273,19 +273,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_name is not null (type: boolean) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs @@ -421,16 +421,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 @@ -550,10 +550,10 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -565,12 +565,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11 - Statistics: Num rows: 17 Data size: 2079 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: + Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 17 Data size: 2079 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2376 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: bigint), _col11 (type: bigint) Reducer 4 Execution mode: llap @@ -582,17 +582,17 @@ STAGE PLANS: 0 _col3 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col13 - Statistics: Num rows: 18 Data size: 2286 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col10 = 0) or (_col13 is null and _col3 is not null and (_col11 >= _col10))) (type: boolean) - Statistics: Num rows: 12 Data size: 1524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 12 Data size: 1524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 1524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -604,12 +604,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -617,10 +617,10 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 9 Execution mode: llap @@ -1144,19 +1144,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_name is not null (type: boolean) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1209,10 +1209,10 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -4091,10 +4091,10 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 6 Execution mode: llap diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out index b4af91579b..4a1ba588f9 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -5686,14 +5686,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -5701,19 +5701,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c1 (type: int) outputColumnNames: c1 - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(c1) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Group By Operator keys: c1 (type: int) @@ -5737,12 +5737,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: llap @@ -5754,17 +5754,17 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 4 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col1 = 0) or (_col4 is null and _col0 is not null and (_col2 >= _col1))) (type: boolean) - Statistics: Num rows: 2 Data size: 209 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 209 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 209 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5776,10 +5776,10 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 6 Execution mode: llap @@ -5788,16 +5788,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 @@ -5844,16 +5844,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c1 (type: int), c2 (type: char(100)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: char(100)) sort order: + Map-reduce partition columns: _col1 (type: char(100)) - Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -6047,19 +6047,19 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: char(100)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: char(100)), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: char(100)) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: char(100)) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Stage: Stage-0 @@ -6151,16 +6151,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -6168,21 +6168,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: b is not null (type: boolean) - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(a) keys: b (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Filter Operator predicate: b is not null (type: boolean) @@ -6209,12 +6209,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 3 Execution mode: llap @@ -6226,17 +6226,17 @@ STAGE PLANS: 0 _col0 (type: int), _col1 (type: int) 1 _col1 (type: int), _col0 (type: int) outputColumnNames: _col0, _col1, _col3, _col4, _col7 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -6249,12 +6249,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 6 Execution mode: llap @@ -6263,23 +6263,23 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int), _col0 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Stage: Stage-0 @@ -6370,16 +6370,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: fixob - Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -6387,21 +6387,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t7 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: j is not null (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(i) keys: j (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Filter Operator predicate: j is not null (type: boolean) @@ -6447,17 +6447,17 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col3 (type: int) outputColumnNames: _col0, _col1, _col3, _col4, _col7 - Statistics: Num rows: 4 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -6470,12 +6470,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 5 Execution mode: llap @@ -6484,16 +6484,16 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 6 Execution mode: llap @@ -6505,12 +6505,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: int) sort order: + Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 8 Execution mode: llap @@ -6519,12 +6519,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -6599,16 +6599,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Filter Operator predicate: j is not null (type: boolean) @@ -6669,17 +6669,17 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col3 (type: int) outputColumnNames: _col0, _col1, _col3, _col4, _col7 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -6692,12 +6692,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -6706,16 +6706,16 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 5 Execution mode: llap @@ -6727,12 +6727,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2, _col3 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: int) sort order: + Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 7 Execution mode: llap @@ -6741,12 +6741,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -6788,16 +6788,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Filter Operator predicate: j is not null (type: boolean) @@ -6839,12 +6839,12 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 3 Execution mode: llap @@ -6856,17 +6856,17 @@ STAGE PLANS: 0 _col0 (type: int), _col1 (type: int) 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col3, _col4, _col7 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -6879,12 +6879,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 5 Execution mode: llap @@ -6893,19 +6893,19 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Stage: Stage-0 @@ -6947,14 +6947,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -6962,19 +6962,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: i - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(i) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Group By Operator keys: i (type: int) @@ -6998,12 +6998,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 3 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: llap @@ -7015,17 +7015,17 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col5 - Statistics: Num rows: 3 Data size: 67 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col2 = 0) or (_col5 is null and _col1 is not null and (_col3 >= _col2))) (type: boolean) - Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -7037,10 +7037,10 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 6 Execution mode: llap @@ -7049,16 +7049,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 @@ -7102,14 +7102,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Select Operator expressions: i (type: int) @@ -7146,12 +7146,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: llap @@ -7163,17 +7163,17 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 3 Data size: 67 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col1 = 0) or (_col4 is null and _col0 is not null and (_col2 >= _col1))) (type: boolean) - Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -7185,10 +7185,10 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 5 Execution mode: llap @@ -7197,16 +7197,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index b78df8b9f5..94a5329d3a 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -112,19 +112,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_size (type: int) outputColumnNames: p_size - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(p_size) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct) Execution mode: llap LLAP IO: no inputs @@ -160,10 +160,10 @@ STAGE PLANS: aggregations: avg(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double) Stage: Stage-0 @@ -361,20 +361,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_name is null (type: boolean) - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Filter Operator predicate: p_name is null (type: boolean) @@ -398,14 +398,14 @@ STAGE PLANS: 1 2 outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 16 Data size: 11084 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 498 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), null (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 16 Data size: 11084 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 16 Data size: 11084 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -417,15 +417,15 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -500,17 +500,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tnull - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) Execution mode: llap @@ -540,17 +540,17 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 26 Data size: 16250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16198 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col10 is null (type: boolean) - Statistics: Num rows: 13 Data size: 8125 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 623 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 13 Data size: 8125 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 13 Data size: 8125 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -592,13 +592,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Stage: Stage-0 @@ -682,19 +682,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_name (type: string) outputColumnNames: p_name - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(p_name) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -723,15 +723,15 @@ STAGE PLANS: aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -1673,16 +1673,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: double), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 115 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 115 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double), _col1 (type: boolean) Stage: Stage-0 @@ -1765,19 +1765,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_size (type: int) outputColumnNames: p_size - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(p_size) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -1847,10 +1847,10 @@ STAGE PLANS: aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Stage: Stage-0 @@ -2030,16 +2030,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 107 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: boolean) Reducer 7 Execution mode: llap @@ -2048,10 +2048,10 @@ STAGE PLANS: aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double) Stage: Stage-0 @@ -3645,14 +3645,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs @@ -3690,21 +3690,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: pp - Statistics: Num rows: 16 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2992 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_type is not null (type: boolean) - Statistics: Num rows: 16 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2992 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(p_name) keys: p_type (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 287 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 287 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -3812,16 +3812,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 287 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 8 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: boolean) Stage: Stage-0 @@ -4446,16 +4446,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col4 (type: string) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col4 (type: string) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs @@ -4817,16 +4817,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: boolean) Stage: Stage-0 @@ -4939,16 +4939,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), true (type: boolean), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: boolean) Stage: Stage-0 @@ -5117,16 +5117,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), true (type: boolean), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: boolean) Stage: Stage-0 @@ -5284,16 +5284,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: boolean) Reducer 7 Execution mode: llap @@ -5302,10 +5302,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/llap/subquery_select.q.out b/ql/src/test/results/clientpositive/llap/subquery_select.q.out index 202980e975..abda9b5183 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_select.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_select.q.out @@ -2178,19 +2178,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tnull - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: i - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(i) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Group By Operator keys: i (type: int) @@ -2214,12 +2214,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 546 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 546 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: llap @@ -2231,14 +2231,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 28 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), CASE WHEN ((_col1 = 0)) THEN (false) WHEN (_col4 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col2 < _col1)) THEN (null) ELSE (false) END (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 28 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2250,10 +2250,10 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 6 Execution mode: llap @@ -2262,16 +2262,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/llap/tez_bmj_schema_evolution.q.out b/ql/src/test/results/clientpositive/llap/tez_bmj_schema_evolution.q.out index c83da8b123..bda0eb5510 100644 --- a/ql/src/test/results/clientpositive/llap/tez_bmj_schema_evolution.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_bmj_schema_evolution.q.out @@ -87,14 +87,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test - Statistics: Num rows: 1000 Data size: 60312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 60312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 60312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -104,11 +104,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 1100 Data size: 66343 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2551 Data size: 242345 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 1100 Data size: 66343 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2551 Data size: 242345 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: some inputs @@ -116,19 +116,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -137,10 +137,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1100 Data size: 66343 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2551 Data size: 242345 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1100 Data size: 66343 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2551 Data size: 242345 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_dml.q.out b/ql/src/test/results/clientpositive/llap/tez_dml.q.out index 786929e7af..6e6ba271f5 100644 --- a/ql/src/test/results/clientpositive/llap/tez_dml.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_dml.q.out @@ -438,11 +438,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -461,8 +465,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmp_src_part + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: c, d + Statistics: Num rows: 309 Data size: 2718 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c, 16) + keys: d (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 309 Data size: 2718 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 309 Data size: 2718 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 154 Data size: 1354 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 154 Data size: 1354 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 154 Data size: 1354 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -482,6 +522,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c + Column Types: string + Table: default.tmp_src_part + PREHOOK: query: INSERT INTO TABLE tmp_src_part PARTITION (d) SELECT * FROM tmp_src PREHOOK: type: QUERY PREHOOK: Input: default@tmp_src @@ -857,6 +904,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -864,6 +913,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -885,6 +938,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.even + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c, d + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c, 16), compute_stats(d, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key % 2) = 1) (type: boolean) Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE @@ -900,8 +966,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.odd + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c, d + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c, 16), compute_stats(d, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -919,6 +1028,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c, d + Column Types: int, string + Table: default.even + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: c, d + Column Types: int, string + Table: default.odd + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/tez_fsstat.q.out b/ql/src/test/results/clientpositive/llap/tez_fsstat.q.out index 133f50c8db..802140cfd2 100644 --- a/ql/src/test/results/clientpositive/llap/tez_fsstat.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_fsstat.q.out @@ -83,7 +83,7 @@ Database: default Table: tab_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/llap/tez_join_hash.q.out b/ql/src/test/results/clientpositive/llap/tez_join_hash.q.out index 92a188e18a..f436baab08 100644 --- a/ql/src/test/results/clientpositive/llap/tez_join_hash.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_join_hash.q.out @@ -58,19 +58,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_src - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -82,15 +82,15 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1219 Data size: 9752 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -99,10 +99,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_nway_join.q.out b/ql/src/test/results/clientpositive/llap/tez_nway_join.q.out index 86f040234b..a3184a7768 100644 --- a/ql/src/test/results/clientpositive/llap/tez_nway_join.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_nway_join.q.out @@ -49,14 +49,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -68,15 +68,15 @@ STAGE PLANS: input vertices: 1 Map 3 2 Map 4 - Statistics: Num rows: 101 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15525 Data size: 124200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -84,38 +84,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -125,10 +125,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -161,14 +161,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -178,7 +178,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 690 Data size: 2760 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -187,15 +187,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 4 - Statistics: Num rows: 55 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15525 Data size: 124200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -203,38 +203,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -244,10 +244,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -302,48 +302,48 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 5 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -357,15 +357,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 101 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15525 Data size: 124200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -374,10 +374,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -410,11 +410,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -424,7 +424,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 690 Data size: 2760 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -433,15 +433,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 4 - Statistics: Num rows: 55 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15525 Data size: 124200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -449,32 +449,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -484,10 +484,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_self_join.q.out b/ql/src/test/results/clientpositive/llap/tez_self_join.q.out index 8ba9761d76..8d89ef820a 100644 --- a/ql/src/test/results/clientpositive/llap/tez_self_join.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_self_join.q.out @@ -38,7 +38,7 @@ POSTHOOK: query: insert into table tez_self_join2 values(1),(2),(3) POSTHOOK: type: QUERY POSTHOOK: Output: default@tez_self_join2 POSTHOOK: Lineage: tez_self_join2.id1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select s.id2, s.id3 from @@ -70,60 +70,60 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: self1 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + alias: tez_self_join2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((id2 = 'ab') and id1 is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + predicate: id1 is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: id1 (type: int), id3 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + expressions: id1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 3 Map Operator Tree: TableScan - alias: self2 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + alias: self1 + Statistics: Num rows: 3 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ('ab' = id3) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + predicate: ((id2 = 'ab') and id1 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + expressions: id1 (type: int), id3 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan - alias: tez_self_join2 - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + alias: self2 + Statistics: Num rows: 3 Data size: 258 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: id1 is not null (type: boolean) - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + predicate: ('ab' = id3) (type: boolean) + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: id1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + sort order: + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -133,42 +133,42 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2 + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'ab' (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: 0 1 outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 'ab' (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -176,7 +176,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select s.id2, s.id3 from ( diff --git a/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out b/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out index 2fcf0fc71c..78803f6aeb 100644 --- a/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out @@ -123,49 +123,61 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: + Map 1 <- Map 3 (CUSTOM_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: s3 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan alias: s1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + input vertices: + 1 Map 3 + Statistics: Num rows: 500 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: s3 + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -173,10 +185,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -222,37 +234,37 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vt1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -264,15 +276,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -281,10 +293,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -295,12 +307,12 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -367,37 +379,37 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: vt1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -406,12 +418,12 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -421,15 +433,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 4 Execution mode: llap @@ -438,10 +450,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -513,36 +525,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 4 @@ -550,28 +562,28 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 5 Execution mode: llap @@ -580,10 +592,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -659,36 +671,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -697,12 +709,12 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -712,15 +724,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 4 Execution mode: llap @@ -729,10 +741,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -743,12 +755,12 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/tez_smb_empty.q.out b/ql/src/test/results/clientpositive/llap/tez_smb_empty.q.out index cd392a7b2b..ccdd5f0f02 100644 --- a/ql/src/test/results/clientpositive/llap/tez_smb_empty.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_smb_empty.q.out @@ -149,21 +149,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 266 Data size: 1064 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -236,11 +236,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s1 - Statistics: Num rows: 242 Data size: 47094 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 242 Data size: 44528 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Left Outer Join 0 to 1 @@ -248,10 +248,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 242 Data size: 139392 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 242 Data size: 137214 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 242 Data size: 139392 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 242 Data size: 137214 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -540,14 +540,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s2 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: s3 @@ -562,14 +562,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Left Outer Join 0 to 1 @@ -578,7 +578,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 532 Data size: 9904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 532 Data size: 2129 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -657,25 +657,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s3 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: s1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Left Outer Join 0 to 1 @@ -684,7 +684,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 532 Data size: 9904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 532 Data size: 2129 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -841,14 +841,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s3 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: s1 @@ -866,7 +866,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 266 Data size: 1064 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/llap/tez_smb_main.q.out b/ql/src/test/results/clientpositive/llap/tez_smb_main.q.out index 66d7aeca70..8cc35d0ead 100644 --- a/ql/src/test/results/clientpositive/llap/tez_smb_main.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_smb_main.q.out @@ -263,33 +263,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -298,15 +298,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 0 Map 1 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -317,10 +317,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -374,33 +374,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -409,15 +409,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 0 Map 1 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -428,10 +428,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -584,11 +584,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 4 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -600,44 +601,23 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE Execution mode: llap + LLAP IO: no inputs Map 4 Map Operator Tree: TableScan @@ -657,6 +637,44 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1364,8 +1382,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (CUSTOM_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 1 (CUSTOM_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1380,15 +1399,43 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 2 + Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -1406,50 +1453,21 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1 input vertices: 0 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1638,14 +1656,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1655,7 +1673,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 4 - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1664,15 +1682,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 6 - Statistics: Num rows: 558 Data size: 10399 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1892 Data size: 15136 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1680,33 +1698,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s3 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: s2 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1715,15 +1733,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 6 - Statistics: Num rows: 558 Data size: 10399 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1892 Data size: 15136 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1731,24 +1749,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 3 @@ -1758,10 +1776,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1811,36 +1829,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1849,19 +1867,19 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1870,15 +1888,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 0 Reducer 2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 5 Execution mode: llap @@ -1887,10 +1905,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out b/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out index 2ca78d7af8..f919b6a4b9 100644 --- a/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out @@ -47,6 +47,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -54,7 +55,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) + Map 4 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -78,9 +80,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.partunion1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: id1, part1 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(id1, 16) + keys: part1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: dummy @@ -101,8 +119,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.partunion1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: id1, part1 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(id1, 16) + keys: part1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 568 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 568 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 568 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -124,6 +178,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: id1 + Column Types: int + Table: default.partunion1 + PREHOOK: query: insert into table partunion1 partition(part1) select temps.* from ( select 1 as id1, '2014' as part1 from dummy diff --git a/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out b/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out index 14e8e4389f..c7739e4f6c 100644 --- a/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out @@ -45,6 +45,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -53,11 +55,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 6 <- Union 3 (CONTAINS) - Map 7 <- Union 3 (CONTAINS) + Map 8 <- Union 3 (CONTAINS) + Map 9 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) - Reducer 5 <- Union 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Union 3 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -78,7 +82,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 8 Map Operator Tree: TableScan alias: s2 @@ -111,7 +115,7 @@ STAGE PLANS: Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 7 + Map 9 Map Operator Tree: TableScan alias: s0 @@ -199,10 +203,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 205 Data size: 55760 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -220,6 +252,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1001 Data size: 456456 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -239,6 +299,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -952,6 +1026,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -961,10 +1037,12 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 7 <- Union 2 (CONTAINS) + Map 9 <- Union 2 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) - Reducer 4 <- Union 2 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 2 (CONTAINS) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Union 2 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -1000,7 +1078,7 @@ STAGE PLANS: Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 7 Map Operator Tree: TableScan alias: s1 @@ -1018,7 +1096,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 9 Map Operator Tree: TableScan alias: s2 @@ -1072,10 +1150,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 205 Data size: 55760 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -1093,10 +1199,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1001 Data size: 456456 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 @@ -1146,6 +1280,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -1855,6 +2003,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -1864,10 +2014,12 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 7 <- Union 2 (CONTAINS) + Map 9 <- Union 2 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) - Reducer 4 <- Union 2 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 2 (CONTAINS) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Union 2 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -1903,7 +2055,7 @@ STAGE PLANS: Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 7 Map Operator Tree: TableScan alias: s1 @@ -1921,7 +2073,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 9 Map Operator Tree: TableScan alias: s2 @@ -1975,10 +2127,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 205 Data size: 55760 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -1996,10 +2176,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1001 Data size: 456456 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 @@ -2049,6 +2257,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -2750,6 +2972,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -2758,10 +2982,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 6 <- Union 3 (CONTAINS) + Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) - Reducer 5 <- Union 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Union 3 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2782,7 +3008,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 8 Map Operator Tree: TableScan alias: s2 @@ -2870,10 +3096,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 205 Data size: 55760 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -2891,6 +3145,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -2910,6 +3192,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -3605,6 +3901,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -3613,10 +3911,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 6 <- Union 3 (CONTAINS) + Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3637,7 +3937,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 8 Map Operator Tree: TableScan alias: s2 @@ -3717,6 +4017,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 114000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -3738,6 +4051,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 34000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -3757,6 +4113,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/union4.q.out b/ql/src/test/results/clientpositive/llap/union4.q.out index b9ca42da29..9429f1d0f2 100644 --- a/ql/src/test/results/clientpositive/llap/union4.q.out +++ b/ql/src/test/results/clientpositive/llap/union4.q.out @@ -23,6 +23,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -30,7 +31,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -51,7 +53,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: s2 @@ -93,7 +95,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -117,6 +147,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Union 3 Vertex: Union 3 @@ -136,6 +179,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/llap/union6.q.out b/ql/src/test/results/clientpositive/llap/union6.q.out index dca14c191e..10c6de714f 100644 --- a/ql/src/test/results/clientpositive/llap/union6.q.out +++ b/ql/src/test/results/clientpositive/llap/union6.q.out @@ -23,14 +23,16 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Map 4 <- Union 3 (CONTAINS) + Map 5 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -51,7 +53,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: s2 @@ -68,6 +70,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -90,6 +105,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -109,6 +152,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable + PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out index b4b601993b..5e6fab4ead 100644 --- a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out +++ b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out @@ -488,17 +488,19 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -555,7 +557,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Reducer 10 + Reducer 11 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -14630,6 +14632,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION DISTINCT @@ -15194,7 +15203,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 6 + Map 8 Map Operator Tree: TableScan alias: s2 @@ -15670,7 +15679,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 8 + Map 9 Map Operator Tree: TableScan alias: s3 @@ -15691,6 +15700,25 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -15732,6 +15760,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 diff --git a/ql/src/test/results/clientpositive/llap/union_fast_stats.q.out b/ql/src/test/results/clientpositive/llap/union_fast_stats.q.out index b13c0ee464..8b99d17440 100644 --- a/ql/src/test/results/clientpositive/llap/union_fast_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/union_fast_stats.q.out @@ -294,7 +294,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} numFiles 4 numRows 20 rawDataSize 4552 @@ -627,7 +627,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} numFiles 2 numRows 20 rawDataSize 4389 diff --git a/ql/src/test/results/clientpositive/llap/union_remove_26.q.out b/ql/src/test/results/clientpositive/llap/union_remove_26.q.out index 67fef54a33..583717a1e5 100644 --- a/ql/src/test/results/clientpositive/llap/union_remove_26.q.out +++ b/ql/src/test/results/clientpositive/llap/union_remove_26.q.out @@ -135,128 +135,12 @@ POSTHOOK: query: explain SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-2 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: inputtbl1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: val (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(1), min(_col1), max(_col1) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int) - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: inputtbl2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: val (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1), min(_col1), max(_col1) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int) - Execution mode: llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: inputtbl3 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: val (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(1), min(_col1), max(_col1) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-1 + Stage: Stage-0 Fetch Operator - limit: -1 + limit: 3 Processor Tree: ListSink diff --git a/ql/src/test/results/clientpositive/llap/union_stats.q.out b/ql/src/test/results/clientpositive/llap/union_stats.q.out index 1f3dc82e15..69b25a9c06 100644 --- a/ql/src/test/results/clientpositive/llap/union_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/union_stats.q.out @@ -425,7 +425,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1000 rawDataSize 10624 @@ -460,7 +460,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1000 rawDataSize 10624 diff --git a/ql/src/test/results/clientpositive/llap/union_top_level.q.out b/ql/src/test/results/clientpositive/llap/union_top_level.q.out index 2fac8ccf0c..245c257432 100644 --- a/ql/src/test/results/clientpositive/llap/union_top_level.q.out +++ b/ql/src/test/results/clientpositive/llap/union_top_level.q.out @@ -650,6 +650,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -657,8 +658,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -679,11 +681,10 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: src @@ -701,11 +702,10 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: src @@ -723,7 +723,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -749,7 +748,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Select Operator @@ -771,7 +798,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 7 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 8 Execution mode: llap Reduce Operator Tree: Select Operator @@ -793,6 +833,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Union 3 Vertex: Union 3 @@ -812,6 +865,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.union_top + PREHOOK: query: insert into table union_top select * from (select key, 0 as value from src where key % 3 == 0 limit 3)a union all @@ -870,6 +930,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -877,8 +938,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -899,11 +961,10 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: src @@ -921,11 +982,10 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: src @@ -943,7 +1003,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -969,7 +1028,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Select Operator @@ -991,7 +1078,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 7 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 8 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1013,6 +1113,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Union 3 Vertex: Union 3 @@ -1032,6 +1145,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.union_top + PREHOOK: query: insert overwrite table union_top select * from (select key, 0 as value from src where key % 3 == 0 limit 3)a union all diff --git a/ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out b/ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out index a2ce36564e..dae532e97c 100644 --- a/ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out @@ -125,17 +125,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c2 regexp 'val' (type: boolean), c4 regexp 'val' (type: boolean), (c2 regexp 'val' = c4 regexp 'val') (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -203,17 +203,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: regexp_extract(c2, 'val_([0-9]+)', 1) (type: string), regexp_extract(c4, 'val_([0-9]+)', 1) (type: string), (regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -281,17 +281,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: regexp_replace(c2, 'val', 'replaced') (type: string), regexp_replace(c4, 'val', 'replaced') (type: string), (regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -359,17 +359,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c2 regexp 'val' (type: boolean), c4 regexp 'val' (type: boolean), (c2 regexp 'val' = c4 regexp 'val') (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -437,7 +437,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3] @@ -449,19 +449,19 @@ STAGE PLANS: native: true projectedOutputColumns: [4, 5, 8] selectExpressions: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 4:string, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 5:string, StringGroupColEqualStringGroupColumn(col 6, col 7)(children: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 6:string, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 7:string) -> 8:boolean - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -531,7 +531,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3] @@ -543,19 +543,19 @@ STAGE PLANS: native: true projectedOutputColumns: [4, 5, 8] selectExpressions: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 4:string, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 5:string, StringGroupColEqualStringGroupColumn(col 6, col 7)(children: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 6:string, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 7:string) -> 8:boolean - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -615,14 +615,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: power(key, 2) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -719,17 +719,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key = 10) (type: boolean) - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -787,14 +787,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: power(key, 2) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -891,17 +891,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key = 10) (type: boolean) - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -964,22 +964,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_case_groupby - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -1011,13 +1011,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1068,7 +1068,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_case_groupby - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -1080,7 +1080,7 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 3] selectExpressions: VectorUDFAdaptor(CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END)(children: NotCol(col 1) -> 2:boolean) -> 3:int - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) Group By Vectorization: @@ -1093,7 +1093,7 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -1136,13 +1136,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out index a98c34f823..a92b17cb6e 100644 --- a/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out @@ -68,32 +68,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -102,10 +102,10 @@ STAGE PLANS: projectedOutputColumns: null mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -128,13 +128,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -202,25 +202,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -228,7 +228,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: vectorOutput: false @@ -237,12 +237,12 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap Reducer 2 Execution mode: vectorized, llap @@ -264,13 +264,13 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumns: [] - Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -310,13 +310,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -409,25 +409,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -435,7 +435,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -445,37 +445,37 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -483,7 +483,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -493,12 +493,12 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap Reducer 2 @@ -523,7 +523,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -544,14 +544,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -578,7 +578,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -674,32 +674,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -708,10 +708,10 @@ STAGE PLANS: projectedOutputColumns: null mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -734,13 +734,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -815,32 +815,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -849,10 +849,10 @@ STAGE PLANS: projectedOutputColumns: null mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -875,13 +875,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -980,32 +980,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -1014,10 +1014,10 @@ STAGE PLANS: projectedOutputColumns: null mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1040,13 +1040,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1133,32 +1133,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -1167,10 +1167,10 @@ STAGE PLANS: projectedOutputColumns: null mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1193,13 +1193,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1265,7 +1265,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -1275,7 +1275,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) outputColumnNames: _col0 @@ -1284,7 +1284,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2] selectExpressions: LongColAddLongScalar(col 0, val 1) -> 2:long - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -1308,7 +1308,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -1318,7 +1318,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) outputColumnNames: _col0 @@ -1327,7 +1327,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2] selectExpressions: LongColAddLongScalar(col 0, val 1) -> 2:long - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -1356,7 +1356,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -1365,10 +1365,10 @@ STAGE PLANS: projectedOutputColumns: null mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1390,13 +1390,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1457,32 +1457,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -1491,10 +1491,10 @@ STAGE PLANS: projectedOutputColumns: null mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1517,13 +1517,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1590,36 +1590,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -1628,7 +1628,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 6 Data size: 613 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -1637,10 +1637,10 @@ STAGE PLANS: projectedOutputColumns: null mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1663,13 +1663,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1752,32 +1752,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -1786,10 +1786,10 @@ STAGE PLANS: projectedOutputColumns: null mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1812,13 +1812,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1900,6 +1900,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -1907,30 +1909,34 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -1938,32 +1944,106 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Execution mode: llap + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1981,6 +2061,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -2118,6 +2212,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -2126,32 +2222,34 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -2159,19 +2257,36 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -2181,56 +2296,93 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 1) -> bigint - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 0 - native: false - projectedOutputColumns: [0] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1] - Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2248,6 +2400,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out index 160a43b33f..33f1d219c2 100644 --- a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out @@ -127,7 +127,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] @@ -137,7 +137,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 10) -> boolean predicate: bin is not null (type: boolean) - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -145,7 +145,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -168,7 +168,7 @@ STAGE PLANS: native: true projectedOutputColumns: [21] selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21)) -> 21:int - Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10000 Data size: 6820000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) Group By Vectorization: @@ -202,7 +202,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] @@ -212,7 +212,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 10) -> boolean predicate: bin is not null (type: boolean) - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -220,7 +220,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col10 (type: binary) sort order: + @@ -351,7 +351,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: hundredorc - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] @@ -362,7 +362,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [10] - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -375,7 +375,7 @@ STAGE PLANS: keys: bin (type: binary) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 3200 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: binary) sort order: + @@ -418,7 +418,7 @@ STAGE PLANS: keys: KEY._col0 (type: binary) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), _col0 (type: binary) outputColumnNames: _col0, _col1 @@ -539,7 +539,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] @@ -549,7 +549,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: i is not null (type: boolean) - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), bin (type: binary) outputColumnNames: _col0, _col1 @@ -557,7 +557,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 10] - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -571,7 +571,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 2 - Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144 Data size: 16704 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: binary), _col3 (type: binary) outputColumnNames: _col0, _col1, _col2 @@ -579,13 +579,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 10, 11] - Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144 Data size: 16704 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144 Data size: 16704 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -604,7 +604,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] @@ -614,7 +614,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: i is not null (type: boolean) - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), bin (type: binary) outputColumnNames: _col0, _col1 @@ -622,7 +622,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 10] - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + diff --git a/ql/src/test/results/clientpositive/llap/vector_bround.q.out b/ql/src/test/results/clientpositive/llap/vector_bround.q.out index d463f1a6e7..d6bdc31ece 100644 --- a/ql/src/test/results/clientpositive/llap/vector_bround.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_bround.q.out @@ -47,7 +47,7 @@ Stage-0 Select Operator [SEL_3] (rows=8 width=16) Output:["_col0","_col1"] TableScan [TS_0] (rows=8 width=16) - default@test_vector_bround,test_vector_bround,Tbl:COMPLETE,Col:NONE,Output:["v0","v1"] + default@test_vector_bround,test_vector_bround,Tbl:COMPLETE,Col:COMPLETE,Output:["v0","v1"] PREHOOK: query: select bround(v0), bround(v1, 1) from test_vector_bround PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vector_bucket.q.out b/ql/src/test/results/clientpositive/llap/vector_bucket.q.out index 6dd0cfb8b0..ae949560c1 100644 --- a/ql/src/test/results/clientpositive/llap/vector_bucket.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_bucket.q.out @@ -21,6 +21,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -28,6 +29,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -66,35 +68,58 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [2, 1] - selectExpressions: CastStringToLong(col 0) -> 2:int Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.non_orc_table + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -112,6 +137,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: a, b + Column Types: int, string + Table: default.non_orc_table + PREHOOK: query: select a, b from non_orc_table order by a PREHOOK: type: QUERY PREHOOK: Input: default@non_orc_table diff --git a/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out b/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out index f06d49a32b..e5a5c7d120 100644 --- a/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out @@ -130,7 +130,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] @@ -141,7 +141,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2] - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(50), avg(50.0), avg(50) Group By Vectorization: @@ -155,12 +155,12 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 133 Data size: 59584 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 133 Data size: 59584 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Execution mode: vectorized, llap @@ -186,11 +186,11 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 133 Data size: 17556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 133 Data size: 17556 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) Reducer 3 @@ -210,19 +210,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3] - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 133 Data size: 17556 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_char_2.q.out b/ql/src/test/results/clientpositive/llap/vector_char_2.q.out index 65fafb0ad4..d304c9228d 100644 --- a/ql/src/test/results/clientpositive/llap/vector_char_2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_char_2.q.out @@ -80,7 +80,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: char_2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -92,7 +92,7 @@ STAGE PLANS: native: true projectedOutputColumns: [1, 2] selectExpressions: CastStringToLong(col 0) -> 2:int - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), count() Group By Vectorization: @@ -105,7 +105,7 @@ STAGE PLANS: keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 22898 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: + @@ -149,7 +149,7 @@ STAGE PLANS: keys: KEY._col0 (type: char(20)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 22898 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: + @@ -177,19 +177,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 22898 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 535 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 535 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -276,7 +276,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: char_2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -288,7 +288,7 @@ STAGE PLANS: native: true projectedOutputColumns: [1, 2] selectExpressions: CastStringToLong(col 0) -> 2:int - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), count() Group By Vectorization: @@ -301,7 +301,7 @@ STAGE PLANS: keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 22898 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: - @@ -345,7 +345,7 @@ STAGE PLANS: keys: KEY._col0 (type: char(20)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 22898 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: - @@ -373,19 +373,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 22898 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 535 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 535 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_char_4.q.out b/ql/src/test/results/clientpositive/llap/vector_char_4.q.out index d164ebef3c..4f91cf0ce8 100644 --- a/ql/src/test/results/clientpositive/llap/vector_char_4.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_char_4.q.out @@ -136,50 +136,74 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] - selectExpressions: CastLongToChar(col 0, maxLength 10) -> 13:Char, CastLongToChar(col 1, maxLength 10) -> 14:Char, CastLongToChar(col 2, maxLength 20) -> 15:Char, CastLongToChar(col 3, maxLength 30) -> 16:Char, VectorUDFAdaptor(CAST( f AS CHAR(20)) -> 17:char(20), VectorUDFAdaptor(CAST( d AS CHAR(20)) -> 18:char(20), CastStringGroupToChar(col 8, maxLength 50) -> 19:Char Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.char_lazy_binary_columnar - Execution mode: vectorized, llap + Select Operator + expressions: _col0 (type: char(10)), _col1 (type: char(10)), _col2 (type: char(20)), _col3 (type: char(30)), _col4 (type: char(20)), _col5 (type: char(20)), _col6 (type: char(50)) + outputColumnNames: ct, csi, ci, cb, cf, cd, cs + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ct, 16), compute_stats(csi, 16), compute_stats(ci, 16), compute_stats(cb, 16), compute_stats(cf, 16), compute_stats(cd, 16), compute_stats(cs, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Execution mode: llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -197,3 +221,10 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: ct, csi, ci, cb, cf, cd, cs + Column Types: char(10), char(10), char(20), char(30), char(20), char(20), char(50) + Table: default.char_lazy_binary_columnar + diff --git a/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out b/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out index 47c709f559..9fcf9a0d7f 100644 --- a/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out @@ -233,12 +233,14 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Vertices: Map 1 Map Operator Tree: @@ -294,6 +296,8 @@ STAGE PLANS: Stage: Stage-3 + Stage: Stage-4 + PREHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc diff --git a/ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out b/ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out index d57d39f335..54062bf7e0 100644 --- a/ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out @@ -46,11 +46,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: str_str_orc - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 595 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 595 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) Group By Vectorization: @@ -60,12 +60,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -81,14 +81,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), round((UDFToDouble(_col1) / 60.0), 2) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -141,14 +141,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: str_str_orc - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: COALESCE(str1,0) (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -208,7 +208,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: str_str_orc - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 595 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -220,7 +220,7 @@ STAGE PLANS: native: true projectedOutputColumns: [1, 4] selectExpressions: CastStringToLong(col 3)(children: VectorCoalesce(columns [0, 2])(children: col 0, ConstantVectorExpression(val 0) -> 2:string) -> 3:string) -> 4:int - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 595 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) Group By Vectorization: @@ -233,7 +233,7 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -276,7 +276,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), round((UDFToDouble(_col1) / 60.0), 2) (type: double) outputColumnNames: _col0, _col1 @@ -285,13 +285,13 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 2] selectExpressions: RoundWithNumDigitsDoubleToDouble(col 3, decimalPlaces 2)(children: DoubleColDivideDoubleScalar(col 2, val 60.0)(children: CastLongToDouble(col 1) -> 2:double) -> 3:double) -> 2:double - Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -344,7 +344,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: str_str_orc - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -356,13 +356,13 @@ STAGE PLANS: native: true projectedOutputColumns: [3] selectExpressions: VectorCoalesce(columns [0, 2])(children: col 0, ConstantVectorExpression(val 0) -> 2:string) -> 3:string - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out b/ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out index 85ddc7cc8d..110869fddf 100644 --- a/ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out @@ -53,11 +53,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: m - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: member (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -67,14 +67,14 @@ STAGE PLANS: outputColumnNames: _col0, _col2 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), CASE WHEN ((COALESCE(_col2,5) > 1)) THEN (_col2) ELSE (null) END (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -85,16 +85,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: n - Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: member (type: bigint), attr (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs diff --git a/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out b/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out index 5ea4b0f639..f0e1b7f8b4 100644 --- a/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out @@ -183,10 +183,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2b - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: a is not null (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 diff --git a/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out b/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out index b9d0f06c3f..1457bc0978 100644 --- a/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out @@ -1252,7 +1252,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: web_sales - Statistics: Num rows: 2000 Data size: 3520000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] @@ -1263,7 +1263,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [16] - Statistics: Num rows: 2000 Data size: 3520000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -1274,7 +1274,7 @@ STAGE PLANS: keys: ws_order_number (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 3520000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 90 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -1314,7 +1314,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 1760000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 90 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0) Group By Vectorization: @@ -1354,13 +1354,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_data_types.q.out b/ql/src/test/results/clientpositive/llap/vector_data_types.q.out index b9bb0a20aa..0bd4bf6d63 100644 --- a/ql/src/test/results/clientpositive/llap/vector_data_types.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_data_types.q.out @@ -119,15 +119,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) Execution mode: llap @@ -138,13 +138,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: boolean), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: decimal(4,2)), VALUE._col7 (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 6820 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 6820 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -219,7 +219,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] @@ -230,7 +230,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ @@ -268,19 +268,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 6820 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 6820 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out index b3ff557bf4..c9e371ad3a 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out @@ -54,15 +54,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToBoolean(t) (type: boolean) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -71,10 +71,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -117,15 +117,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToByte(t) (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -134,10 +134,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -180,15 +180,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToShort(t) (type: smallint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -197,10 +197,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: smallint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -243,15 +243,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -260,10 +260,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -306,15 +306,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToLong(t) (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -323,10 +323,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -369,15 +369,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToFloat(t) (type: float) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float) sort order: + - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -386,10 +386,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -432,15 +432,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToDouble(t) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -449,10 +449,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -495,15 +495,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToString(t) (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -512,10 +512,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -558,15 +558,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CAST( t AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -575,10 +575,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out index 947ac81001..a894e21228 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out @@ -43,15 +43,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToBoolean(t) (type: boolean) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -60,10 +60,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -106,15 +106,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToByte(t) (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -123,10 +123,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -169,15 +169,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToShort(t) (type: smallint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -186,10 +186,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: smallint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -232,15 +232,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -249,10 +249,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -295,15 +295,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToLong(t) (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -312,10 +312,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -358,15 +358,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToFloat(t) (type: float) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -375,10 +375,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -421,15 +421,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToDouble(t) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -438,10 +438,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -484,15 +484,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToString(t) (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -501,10 +501,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -558,15 +558,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToBoolean(t) (type: boolean) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -575,10 +575,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -621,15 +621,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToByte(t) (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -638,10 +638,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -684,15 +684,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToShort(t) (type: smallint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -701,10 +701,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: smallint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -747,15 +747,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -764,10 +764,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -810,15 +810,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToLong(t) (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -827,10 +827,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -873,15 +873,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToFloat(t) (type: float) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -890,10 +890,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -936,15 +936,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToDouble(t) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -953,10 +953,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -999,15 +999,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToString(t) (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1016,10 +1016,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out index c62e25a0e5..2a89e3b562 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out @@ -98,7 +98,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -108,7 +108,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: dec is not null (type: boolean) - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: dec (type: decimal(4,2)) outputColumnNames: _col0 @@ -116,7 +116,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -132,13 +132,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 2 - Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1126 Data size: 252224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1126 Data size: 252224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -157,7 +157,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -167,7 +167,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: dec is not null (type: boolean) - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: dec (type: decimal(4,0)) outputColumnNames: _col0 @@ -175,7 +175,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(6,2)) sort order: + diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out index 5d6208608f..6df5053681 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out @@ -569,7 +569,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_precision - Statistics: Num rows: 75 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 3584 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -580,7 +580,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 75 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 3584 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(dec), sum(dec) Group By Vectorization: @@ -592,10 +592,10 @@ STAGE PLANS: vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: decimal(30,10)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -619,10 +619,10 @@ STAGE PLANS: aggregations: avg(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out index b6175646d3..fe79ef634d 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out @@ -54,7 +54,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_txt - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -66,7 +66,7 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1] selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 1:decimal(11,0) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(10,0)) sort order: + @@ -103,13 +103,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -156,7 +156,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_txt - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -205,13 +205,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 0] - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -284,15 +284,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_rc - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: dec (type: decimal(10,0)), round(dec, -1) (type: decimal(11,0)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(10,0)) sort order: + - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(11,0)) Execution mode: llap LLAP IO: no inputs @@ -317,13 +317,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -403,13 +403,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 0] - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -482,7 +482,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_orc - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -494,7 +494,7 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1] selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 1:decimal(11,0) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(10,0)) sort order: + @@ -531,13 +531,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -584,7 +584,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_orc - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -633,13 +633,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out index a3bf091fc1..971984483b 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out @@ -58,7 +58,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_1_orc - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -70,7 +70,7 @@ STAGE PLANS: native: true projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] selectExpressions: FuncRoundDecimalToDecimal(col 0) -> 1:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 3:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 4:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 5:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 6:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -5) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -6) -> 11:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -7) -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -8) -> 13:decimal(21,0) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + @@ -107,13 +107,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -209,7 +209,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_2_orc - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -221,7 +221,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] selectExpressions: FuncRoundDecimalToDecimal(col 0) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 3:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 4:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 5:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 6:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 4) -> 7:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 11:decimal(21,0), FuncRoundDecimalToDecimal(col 1) -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 0) -> 13:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 1) -> 14:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 2) -> 15:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 3) -> 16:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 4) -> 17:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -1) -> 18:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -2) -> 19:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -3) -> 20:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -4) -> 21:decimal(21,0) - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + @@ -258,13 +258,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -387,7 +387,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_3_orc - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -399,7 +399,7 @@ STAGE PLANS: native: true projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -15) -> 1:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -16) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -13) -> 3:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -14) -> 4:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -11) -> 5:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -12) -> 6:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -9) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -10) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -7) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -8) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -5) -> 11:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -6) -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 13:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 14:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 15:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 16:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 17:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 18:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 19:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 20:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 4) -> 21:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 5) -> 22:decimal(26,5), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 6) -> 23:decimal(27,6), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 7) -> 24:decimal(28,7), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 8) -> 25:decimal(29,8), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 9) -> 26:decimal(30,9), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 10) -> 27:decimal(31,10), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 11) -> 28:decimal(32,11), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 12) -> 29:decimal(33,12), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 13) -> 30:decimal(34,13), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 14) -> 31:decimal(35,14), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 15) -> 32:decimal(36,15), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 16) -> 33:decimal(37,16) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3808 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + @@ -436,13 +436,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, 31, 32] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3808 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3808 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -554,7 +554,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_4_orc - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -566,7 +566,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 3] selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 9) -> 2:decimal(30,9), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 9) -> 3:decimal(30,9) - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(30,9)) sort order: + @@ -604,13 +604,13 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1, 2, 3] selectExpressions: ConstantVectorExpression(val 1809242.315111134) -> 2:decimal(17,9), ConstantVectorExpression(val -1809242.315111134) -> 3:decimal(17,9) - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out index c271b8230c..e48347ad68 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out @@ -65,14 +65,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + key) (type: decimal(21,10)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -149,14 +149,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + CAST( value AS decimal(10,0))) (type: decimal(21,10)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -233,14 +233,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToDouble(key) + (UDFToDouble(value) / 2.0)) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -317,14 +317,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToDouble(key) + 1.0) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -401,14 +401,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key - key) (type: decimal(21,10)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -485,14 +485,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key - CAST( value AS decimal(10,0))) (type: decimal(21,10)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -569,14 +569,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToDouble(key) - (UDFToDouble(value) / 2.0)) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -653,14 +653,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToDouble(key) - 1.0) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -737,14 +737,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key * key) (type: decimal(38,17)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -821,17 +821,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key * CAST( value AS decimal(10,0))) > 0) (type: boolean) - Statistics: Num rows: 12 Data size: 1356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1392 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: decimal(20,10)), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1392 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 1356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1392 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -893,14 +893,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key * CAST( value AS decimal(10,0))) (type: decimal(31,10)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -977,14 +977,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToDouble(key) * (UDFToDouble(value) / 2.0)) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1061,14 +1061,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToDouble(key) * 2.0) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1145,17 +1145,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key / 0) (type: decimal(22,12)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1195,17 +1195,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToDouble(key) / null) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1245,17 +1245,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key <> 0) (type: boolean) - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key / key) (type: decimal(38,18)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1328,17 +1328,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (value <> 0) (type: boolean) - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key / CAST( value AS decimal(10,0))) (type: decimal(31,21)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1401,17 +1401,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (value <> 0) (type: boolean) - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToDouble(key) / (UDFToDouble(value) / 2.0)) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1474,14 +1474,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (1.0 + (UDFToDouble(key) / 2.0)) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1558,14 +1558,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: abs(key) (type: decimal(20,10)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1646,22 +1646,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: decimal(20,10)), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(key), count(key), avg(key) keys: value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 5768 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 5768 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint), _col3 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1673,15 +1673,15 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 3304 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), (_col1 / CAST( _col2 AS decimal(19,0))) (type: decimal(38,18)), _col3 (type: decimal(24,14)), _col1 (type: decimal(30,10)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 4760 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 4760 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(38,18)), _col2 (type: decimal(24,14)), _col3 (type: decimal(30,10)) Reducer 3 Execution mode: vectorized, llap @@ -1689,10 +1689,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(38,18)), VALUE._col1 (type: decimal(24,14)), VALUE._col2 (type: decimal(30,10)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 4760 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 4760 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1746,14 +1746,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (- key) (type: decimal(20,10)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1895,14 +1895,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ceil(key) (type: decimal(11,0)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1979,14 +1979,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: floor(key) (type: decimal(11,0)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2063,14 +2063,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: round(key, 2) (type: decimal(13,2)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2147,14 +2147,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: power(key, 2) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2231,14 +2231,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ((key + 1) % (key / 2)) (type: decimal(22,12)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2318,22 +2318,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: decimal(20,10)), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: stddev(key), variance(key) keys: value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2296 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2296 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2345,10 +2345,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 280 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 280 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2405,22 +2405,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: decimal(20,10)), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: stddev_samp(key), var_samp(key) keys: value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2296 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2296 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2432,10 +2432,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 280 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 280 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2492,19 +2492,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: decimal(20,10)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: histogram_numeric(_col0, 3) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: array) Execution mode: llap LLAP IO: all inputs @@ -2515,10 +2515,10 @@ STAGE PLANS: aggregations: histogram_numeric(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2559,19 +2559,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: decimal(20,10)) outputColumnNames: key - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(key) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2582,10 +2582,10 @@ STAGE PLANS: aggregations: min(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2626,19 +2626,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: decimal(20,10)) outputColumnNames: key - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(key) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2649,10 +2649,10 @@ STAGE PLANS: aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2693,19 +2693,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: decimal(20,10)) outputColumnNames: key - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(key) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2716,10 +2716,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out index e00de78470..da4939eef9 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out @@ -73,7 +73,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf2 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -83,7 +83,7 @@ STAGE PLANS: native: true predicateExpression: FilterDecimalColEqualDecimalScalar(col 0, val 10) -> boolean predicate: (key = 10) (type: boolean) - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: NaN (type: double), NaN (type: double), 1.4711276743037347 (type: double), -0.8390715290764524 (type: double), -0.5440211108893698 (type: double), 0.6483608274590866 (type: double), 0.17453292519943295 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -92,13 +92,13 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8] selectExpressions: ConstantVectorExpression(val NaN) -> 2:double, ConstantVectorExpression(val NaN) -> 3:double, ConstantVectorExpression(val 1.4711276743037347) -> 4:double, ConstantVectorExpression(val -0.8390715290764524) -> 5:double, ConstantVectorExpression(val -0.5440211108893698) -> 6:double, ConstantVectorExpression(val 0.6483608274590866) -> 7:double, ConstantVectorExpression(val 0.17453292519943295) -> 8:double - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -162,7 +162,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf2 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -172,7 +172,7 @@ STAGE PLANS: native: true predicateExpression: FilterDecimalColEqualDecimalScalar(col 0, val 10) -> boolean predicate: (key = 10) (type: boolean) - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -181,13 +181,13 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9] selectExpressions: ConstantVectorExpression(val 22026.465794806718) -> 2:double, ConstantVectorExpression(val 2.302585092994046) -> 3:double, ConstantVectorExpression(val 2.302585092994046) -> 4:double, ConstantVectorExpression(val 1.0) -> 5:double, FuncLogWithBaseLongToDouble(col 1) -> 6:double, VectorUDFAdaptor(log(value, 10)) -> 7:double, ConstantVectorExpression(val 1.0) -> 8:double, ConstantVectorExpression(val 3.1622776601683795) -> 9:double - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out index ffeab2c132..5712c41230 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out @@ -35,6 +35,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -43,6 +44,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -143,6 +146,60 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reducer 4 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -160,6 +217,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + PREHOOK: query: FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out index 5bfa9b5e8a..43f46c997a 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out @@ -35,6 +35,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -43,6 +44,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -143,6 +146,60 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reducer 4 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -160,6 +217,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + PREHOOK: query: FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out index 2fa1efe518..9c04e2de8e 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out @@ -594,6 +594,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -604,8 +606,10 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -684,10 +688,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: partials @@ -699,7 +731,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -721,6 +753,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -738,6 +798,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out index 4deef94c54..83e2f6e80d 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out @@ -381,22 +381,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: string), b (type: string), (c + d) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2) keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -408,12 +408,12 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2184 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -423,15 +423,15 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2184 Basic stats: COMPLETE Column stats: COMPLETE pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out index 678db83a9f..4eda3a7f3e 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out @@ -50,22 +50,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: category (type: int), live (type: int), comments (type: int) outputColumnNames: category, live, comments - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(live), max(comments) keys: category (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -77,16 +77,16 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE pruneGroupingSetId: true Filter Operator predicate: (_col3 > 0) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col3 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int) Reducer 3 Execution mode: llap @@ -94,7 +94,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey1 (type: int) outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -115,14 +115,14 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out index 39e81f3789..23a05e3747 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out @@ -250,7 +250,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] @@ -261,7 +261,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [9] - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -272,7 +272,7 @@ STAGE PLANS: keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -449,7 +449,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] @@ -460,7 +460,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [9] - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -471,7 +471,7 @@ STAGE PLANS: keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -511,7 +511,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col0) Group By Vectorization: @@ -524,7 +524,7 @@ STAGE PLANS: keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 @@ -532,7 +532,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1] - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -558,13 +558,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -729,7 +729,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 11816 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] @@ -740,7 +740,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 9, 10] - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 11816 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(ss_quantity) Group By Vectorization: @@ -753,7 +753,7 @@ STAGE PLANS: keys: ss_ticket_number (type: int), ss_item_sk (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ @@ -796,7 +796,7 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -804,7 +804,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 0, 2] - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col2) Group By Vectorization: @@ -817,7 +817,7 @@ STAGE PLANS: keys: _col1 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -844,13 +844,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1015,7 +1015,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 11816 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] @@ -1026,7 +1026,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 9, 10] - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 11816 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(ss_quantity) Group By Vectorization: @@ -1039,7 +1039,7 @@ STAGE PLANS: keys: ss_ticket_number (type: int), ss_item_sk (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ @@ -1082,7 +1082,7 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -1090,7 +1090,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 0, 2] - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2) Group By Vectorization: @@ -1103,7 +1103,7 @@ STAGE PLANS: keys: _col1 (type: int), _col0 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ @@ -1130,13 +1130,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out index e737f0be6e..dd144b4888 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out @@ -431,6 +431,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -441,8 +443,10 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -500,7 +504,7 @@ STAGE PLANS: Statistics: Num rows: 18 Data size: 3078 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -521,7 +525,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -536,8 +568,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 18 Data size: 3078 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Reducer 5 - Execution mode: vectorized, llap + Reducer 6 + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -558,6 +590,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -575,6 +635,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out b/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out index 73468110b0..21f1a0834c 100644 --- a/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out @@ -54,7 +54,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -64,7 +64,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int) outputColumnNames: _col0 @@ -72,7 +72,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -89,7 +89,7 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 @@ -97,13 +97,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -127,7 +127,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -137,7 +137,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int) outputColumnNames: _col0 @@ -145,7 +145,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -216,7 +216,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -226,7 +226,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int) outputColumnNames: _col0 @@ -234,7 +234,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Semi Join 0 to 1 @@ -252,13 +252,13 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -282,7 +282,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -292,7 +292,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int) outputColumnNames: _col0 @@ -300,7 +300,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -311,7 +311,7 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -414,7 +414,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -424,7 +424,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int) outputColumnNames: _col0 @@ -432,7 +432,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -450,7 +450,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2 input vertices: 1 Map 2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1 @@ -458,13 +458,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 0] - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -489,7 +489,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -499,7 +499,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 @@ -507,7 +507,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -579,7 +579,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -589,7 +589,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 @@ -597,7 +597,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -629,7 +629,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -639,7 +639,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 @@ -647,7 +647,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -666,13 +666,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -737,7 +737,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -747,7 +747,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 @@ -755,7 +755,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -774,7 +774,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), (_col3 * 2) (type: int), (_col0 * 5) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 @@ -783,13 +783,13 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 3, 4, 1] selectExpressions: LongColMultiplyLongScalar(col 0, val 2) -> 3:long, LongColMultiplyLongScalar(col 0, val 5) -> 4:long - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -814,7 +814,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -824,7 +824,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 @@ -832,7 +832,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -904,7 +904,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -914,7 +914,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 @@ -922,7 +922,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -941,7 +941,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -949,13 +949,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 1, 0] - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -980,7 +980,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -990,7 +990,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 @@ -998,7 +998,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -1070,7 +1070,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -1080,7 +1080,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 @@ -1088,7 +1088,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1107,7 +1107,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: int), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -1115,13 +1115,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 2, 1] - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1146,7 +1146,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -1156,7 +1156,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 @@ -1164,7 +1164,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -1236,7 +1236,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -1246,7 +1246,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 @@ -1254,7 +1254,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -1286,7 +1286,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -1296,7 +1296,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 @@ -1304,7 +1304,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1323,7 +1323,7 @@ STAGE PLANS: outputColumnNames: _col0, _col2, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col3 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -1331,13 +1331,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 1, 0] - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1402,7 +1402,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -1412,7 +1412,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 @@ -1420,7 +1420,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -1452,7 +1452,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -1462,7 +1462,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 @@ -1470,7 +1470,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1489,7 +1489,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -1497,13 +1497,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 2, 1] - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out b/ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out index 8912bd47a4..2b3a26c5ff 100644 --- a/ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out @@ -102,11 +102,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -119,14 +119,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -137,16 +137,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: llap LLAP IO: all inputs @@ -196,11 +196,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -213,14 +213,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -231,16 +231,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: llap LLAP IO: all inputs @@ -290,7 +290,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2] @@ -301,7 +301,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -319,7 +319,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 @@ -327,13 +327,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3] - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -352,7 +352,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2] @@ -363,7 +363,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 2] - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -430,7 +430,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2] @@ -441,7 +441,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -459,7 +459,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 @@ -467,13 +467,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3] - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -492,7 +492,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2] @@ -503,7 +503,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 2] - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -570,7 +570,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2] @@ -581,7 +581,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -598,7 +598,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 @@ -606,13 +606,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3] - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -631,7 +631,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2] @@ -642,7 +642,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 2] - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -709,7 +709,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2] @@ -720,7 +720,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -737,7 +737,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 @@ -745,13 +745,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3] - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -770,7 +770,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2] @@ -781,7 +781,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 2] - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + diff --git a/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out b/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out index d537297c85..2318d8c599 100644 --- a/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out @@ -84,6 +84,9 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-5, Stage-6, Stage-7 + Stage-10 depends on stages: Stage-5, Stage-6, Stage-7 Stage-1 depends on stages: Stage-4 Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -93,6 +96,11 @@ STAGE PLANS: Stage: Stage-3 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -114,6 +122,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: rn + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(rn, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) Filter Operator predicate: ((rn >= 100) and (rn < 1000)) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE @@ -129,6 +150,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn2 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: rn + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(rn, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) Filter Operator predicate: (rn >= 1000) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE @@ -144,16 +178,87 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn3 - Execution mode: vectorized, llap + Select Operator + expressions: _col0 (type: int) + outputColumnNames: rn + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(rn, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Execution mode: llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -171,6 +276,27 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: rn + Column Types: int + Table: default.orc_rn1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: rn + Column Types: int + Table: default.orc_rn2 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: rn + Column Types: int + Table: default.orc_rn3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out index 5017c00239..92c5bcda3a 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out @@ -84,7 +84,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -95,7 +95,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -115,13 +115,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 1674 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 1674 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -146,7 +146,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -157,7 +157,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -234,7 +234,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -245,7 +245,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -277,7 +277,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -288,7 +288,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -308,13 +308,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 1674 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 1674 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out b/ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out index f963a62449..578d6cb374 100644 --- a/ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out @@ -98,7 +98,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: inventory_part_0 - Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4] @@ -109,7 +109,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3] - Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(inv_quantity_on_hand) Group By Vectorization: @@ -159,13 +159,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -267,7 +267,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: inventory_part_1 - Statistics: Num rows: 200 Data size: 13476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5] @@ -278,7 +278,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3] - Statistics: Num rows: 200 Data size: 13476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(inv_quantity_on_hand) Group By Vectorization: @@ -328,13 +328,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -436,7 +436,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: inventory_part_2a - Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4] @@ -447,7 +447,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3] - Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(inv_quantity_on_hand) Group By Vectorization: @@ -497,13 +497,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -592,7 +592,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: inventory_part_2b - Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 792 Basic stats: COMPLETE Column stats: PARTIAL TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5] @@ -603,7 +603,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3] - Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 792 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: sum(inv_quantity_on_hand) Group By Vectorization: @@ -653,13 +653,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -748,7 +748,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: inventory_part_3 - Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4] @@ -759,7 +759,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3] - Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(inv_quantity_on_hand) Group By Vectorization: @@ -809,13 +809,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index e8444fcbf4..3529efff32 100644 --- a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -944,7 +944,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_orc_partitioned_date - Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5] @@ -955,13 +955,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 137 Data size: 7672 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 137 Data size: 7672 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1179,7 +1179,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_orc_partitioned_date - Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5] @@ -1190,7 +1190,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: date) sort order: ++ @@ -1228,13 +1228,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 137 Data size: 7672 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: date) sort order: ++ @@ -1262,19 +1262,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out b/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out index fb9e1211ce..e76db77f57 100644 --- a/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out @@ -122,7 +122,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 101753 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] @@ -134,19 +134,19 @@ STAGE PLANS: native: true projectedOutputColumns: [7, 12, 11] selectExpressions: StringGroupColConcatStringScalar(col 11, val )(children: StringScalarConcatStringGroupCol(val , col 7) -> 11:String_Family) -> 12:String_Family, StringGroupColConcatStringScalar(col 13, val |)(children: StringScalarConcatStringGroupCol(val |, col 11)(children: StringRTrim(col 13)(children: StringGroupColConcatStringScalar(col 11, val )(children: StringScalarConcatStringGroupCol(val , col 7) -> 11:String_Family) -> 13:String_Family) -> 11:String) -> 13:String_Family) -> 11:String_Family - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 487785 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9300 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9300 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_struct_in.q.out b/ql/src/test/results/clientpositive/llap/vector_struct_in.q.out index d583f09cb1..1a2a3c9df4 100644 --- a/ql/src/test/results/clientpositive/llap/vector_struct_in.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_struct_in.q.out @@ -59,7 +59,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_1 - Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -69,7 +69,7 @@ STAGE PLANS: native: true predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, BYTES], structColumnMap [0, 1]) -> boolean predicate: (struct(id,lineid)) IN (const struct('two','3'), const struct('three','1'), const struct('one','1'), const struct('five','2'), const struct('six','1'), const struct('eight','1'), const struct('seven','1'), const struct('nine','1'), const struct('ten','1')) (type: boolean) - Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: string), lineid (type: string) outputColumnNames: _col0, _col1 @@ -77,13 +77,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -182,7 +182,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_1 - Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -194,13 +194,13 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1, 3] selectExpressions: StructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, BYTES], structColumnMap [0, 1]) -> 3:boolean - Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -315,7 +315,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -325,7 +325,7 @@ STAGE PLANS: native: true predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [LONG, LONG], structColumnMap [0, 1]) -> boolean predicate: (struct(id,lineid)) IN (const struct(2,3), const struct(3,1), const struct(1,1), const struct(5,2), const struct(6,1), const struct(8,1), const struct(7,1), const struct(9,1), const struct(10,1)) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: int), lineid (type: int) outputColumnNames: _col0, _col1 @@ -333,13 +333,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -438,7 +438,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -450,13 +450,13 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1, 3] selectExpressions: StructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [LONG, LONG], structColumnMap [0, 1]) -> 3:boolean - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -571,7 +571,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_3 - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -581,7 +581,7 @@ STAGE PLANS: native: true predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, LONG], structColumnMap [0, 1]) -> boolean predicate: (struct(id,lineid)) IN (const struct('two',3), const struct('three',1), const struct('one',1), const struct('five',2), const struct('six',1), const struct('eight',1), const struct('seven',1), const struct('nine',1), const struct('ten',1)) (type: boolean) - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: string), lineid (type: int) outputColumnNames: _col0, _col1 @@ -589,13 +589,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -694,7 +694,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_3 - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -706,13 +706,13 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1, 3] selectExpressions: StructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, LONG], structColumnMap [0, 1]) -> 3:boolean - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -830,7 +830,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_4 - Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2] @@ -840,7 +840,7 @@ STAGE PLANS: native: true predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1, col 2], fieldVectorColumnTypes [LONG, BYTES, DOUBLE], structColumnMap [0, 1, 2]) -> boolean predicate: (struct(my_bigint,my_string,my_double)) IN (const struct(1,'a',1.5), const struct(1,'b',-0.5), const struct(3,'b',1.5), const struct(1,'d',1.5), const struct(1,'c',1.5), const struct(1,'b',2.5), const struct(1,'b',0.5), const struct(5,'b',1.5), const struct(1,'a',0.5), const struct(3,'b',1.5)) (type: boolean) - Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: my_bigint (type: bigint), my_string (type: string), my_double (type: double) outputColumnNames: _col0, _col1, _col2 @@ -848,13 +848,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -956,7 +956,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_4 - Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2] @@ -968,13 +968,13 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1, 2, 4] selectExpressions: StructColumnInList(structExpressions [col 0, col 1, col 2], fieldVectorColumnTypes [LONG, BYTES, DOUBLE], structColumnMap [0, 1, 2]) -> 4:boolean - Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_udf1.q.out b/ql/src/test/results/clientpositive/llap/vector_udf1.q.out index 0dd278e24c..e4cdcb1b58 100644 --- a/ql/src/test/results/clientpositive/llap/vector_udf1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_udf1.q.out @@ -57,17 +57,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: concat(c1, c2) (type: string), concat(c3, c4) (type: varchar(30)), (concat(c1, c2) = UDFToString(concat(c3, c4))) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 302 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 302 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 302 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -125,17 +125,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: upper(c2) (type: string), upper(c4) (type: varchar(20)), (upper(c2) = UDFToString(upper(c4))) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -193,17 +193,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: lower(c2) (type: string), lower(c4) (type: varchar(20)), (lower(c2) = UDFToString(lower(c4))) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -261,17 +261,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ascii(c2) (type: int), ascii(c4) (type: int), (ascii(c2) = ascii(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -329,17 +329,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: concat_ws('|', c1, c2) (type: string), concat_ws('|', c3, c4) (type: string), (concat_ws('|', c1, c2) = concat_ws('|', c3, c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -397,17 +397,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: decode(encode(c2,'US-ASCII'),'US-ASCII') (type: string), decode(encode(c4,'US-ASCII'),'US-ASCII') (type: string), (decode(encode(c2,'US-ASCII'),'US-ASCII') = decode(encode(c4,'US-ASCII'),'US-ASCII')) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -465,17 +465,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: instr(c2, '_') (type: int), instr(c4, '_') (type: int), (instr(c2, '_') = instr(c4, '_')) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -533,17 +533,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: replace(c1, '_', c2) (type: string), replace(c3, '_', c4) (type: string), (replace(c1, '_', c2) = replace(c3, '_', c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -601,17 +601,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: reverse(c2) (type: string), reverse(c4) (type: string), (reverse(c2) = reverse(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -669,17 +669,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: next_day(d1, 'TU') (type: string), next_day(d4, 'WE') (type: string), (next_day(d1, 'TU') = next_day(d4, 'WE')) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -737,17 +737,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: months_between(d1, d3) (type: double), months_between(d2, d4) (type: double), (months_between(d1, d3) = months_between(d2, d4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -805,17 +805,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: length(c2) (type: int), length(c4) (type: int), (length(c2) = length(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -941,17 +941,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: lpad(c2, 15, ' ') (type: string), lpad(c4, 15, ' ') (type: string), (lpad(c2, 15, ' ') = lpad(c4, 15, ' ')) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1009,17 +1009,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ltrim(c2) (type: string), ltrim(c4) (type: string), (ltrim(c2) = ltrim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1077,17 +1077,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c2 regexp 'val' (type: boolean), c4 regexp 'val' (type: boolean), (c2 regexp 'val' = c4 regexp 'val') (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1145,17 +1145,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: regexp_extract(c2, 'val_([0-9]+)', 1) (type: string), regexp_extract(c4, 'val_([0-9]+)', 1) (type: string), (regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1213,17 +1213,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: regexp_replace(c2, 'val', 'replaced') (type: string), regexp_replace(c4, 'val', 'replaced') (type: string), (regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1281,17 +1281,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: reverse(c2) (type: string), reverse(c4) (type: string), (reverse(c2) = reverse(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1349,17 +1349,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: rpad(c2, 15, ' ') (type: string), rpad(c4, 15, ' ') (type: string), (rpad(c2, 15, ' ') = rpad(c4, 15, ' ')) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1417,17 +1417,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: rtrim(c2) (type: string), rtrim(c4) (type: string), (rtrim(c2) = rtrim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1547,17 +1547,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: split(c2, '_') (type: array), split(c4, '_') (type: array) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1677,17 +1677,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: substr(c2, 1, 3) (type: string), substr(c4, 1, 3) (type: string), (substr(c2, 1, 3) = substr(c4, 1, 3)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1745,17 +1745,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: trim(c2) (type: string), trim(c4) (type: string), (trim(c2) = trim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1814,19 +1814,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c2 (type: string), c4 (type: varchar(20)) outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(_col0, 16), compute_stats(_col2, 16) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: all inputs @@ -1837,10 +1837,10 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1895,19 +1895,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c2 (type: string), c4 (type: varchar(20)) outputColumnNames: c2, c4 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(c2), min(c4) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1918,10 +1918,10 @@ STAGE PLANS: aggregations: min(VALUE._col0), min(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1976,19 +1976,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c2 (type: string), c4 (type: varchar(20)) outputColumnNames: c2, c4 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(c2), max(c4) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1999,10 +1999,10 @@ STAGE PLANS: aggregations: max(VALUE._col0), max(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_udf_character_length.q.out b/ql/src/test/results/clientpositive/llap/vector_udf_character_length.q.out index 559a82b1a4..c88a6fcf86 100644 --- a/ql/src/test/results/clientpositive/llap/vector_udf_character_length.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_udf_character_length.q.out @@ -47,11 +47,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -70,8 +74,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Execution mode: vectorized, llap + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) + Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -89,6 +121,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value) PREHOOK: type: QUERY PREHOOK: Input: default@src1 diff --git a/ql/src/test/results/clientpositive/llap/vector_udf_octet_length.q.out b/ql/src/test/results/clientpositive/llap/vector_udf_octet_length.q.out index cee832212e..b1037c48a4 100644 --- a/ql/src/test/results/clientpositive/llap/vector_udf_octet_length.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_udf_octet_length.q.out @@ -30,11 +30,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -53,8 +57,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Execution mode: vectorized, llap + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) + Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -72,6 +104,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value) PREHOOK: type: QUERY PREHOOK: Input: default@src1 diff --git a/ql/src/test/results/clientpositive/llap/vector_varchar_4.q.out b/ql/src/test/results/clientpositive/llap/vector_varchar_4.q.out index 5979f8be7f..83f7624a6a 100644 --- a/ql/src/test/results/clientpositive/llap/vector_varchar_4.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_varchar_4.q.out @@ -136,50 +136,74 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] - selectExpressions: CastLongToVarChar(col 0, maxLength 10) -> 13:VarChar, CastLongToVarChar(col 1, maxLength 10) -> 14:VarChar, CastLongToVarChar(col 2, maxLength 20) -> 15:VarChar, CastLongToVarChar(col 3, maxLength 30) -> 16:VarChar, VectorUDFAdaptor(CAST( f AS varchar(20))) -> 17:varchar(20), VectorUDFAdaptor(CAST( d AS varchar(20))) -> 18:varchar(20), CastStringGroupToVarChar(col 8, maxLength 50) -> 19:VarChar Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.varchar_lazy_binary_columnar - Execution mode: vectorized, llap + Select Operator + expressions: _col0 (type: varchar(10)), _col1 (type: varchar(10)), _col2 (type: varchar(20)), _col3 (type: varchar(30)), _col4 (type: varchar(20)), _col5 (type: varchar(20)), _col6 (type: varchar(50)) + outputColumnNames: vt, vsi, vi, vb, vf, vd, vs + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(vt, 16), compute_stats(vsi, 16), compute_stats(vi, 16), compute_stats(vb, 16), compute_stats(vf, 16), compute_stats(vd, 16), compute_stats(vs, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Execution mode: llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -197,3 +221,10 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: vt, vsi, vi, vb, vf, vd, vs + Column Types: varchar(10), varchar(10), varchar(20), varchar(30), varchar(20), varchar(20), varchar(50) + Table: default.varchar_lazy_binary_columnar + diff --git a/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out b/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out index 4e48a27e71..99c576ebf5 100644 --- a/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out @@ -75,15 +75,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_2 - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: varchar(10)), value (type: varchar(20)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: varchar(10)) sort order: + - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: varchar(20)) Execution mode: vectorized, llap @@ -109,13 +109,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 - Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -195,15 +195,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_2 - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: varchar(10)), value (type: varchar(20)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: varchar(10)) sort order: - - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: varchar(20)) Execution mode: vectorized, llap @@ -229,13 +229,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 - Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -301,6 +301,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -308,6 +309,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -412,6 +414,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: field + Column Types: varchar(25) + Table: default.varchar_3 + PREHOOK: query: insert into table varchar_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc diff --git a/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out b/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out index 06dde804dc..e169e170d9 100644 --- a/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out @@ -40,7 +40,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_case_groupby - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -52,7 +52,7 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 3] selectExpressions: VectorUDFAdaptor(CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END)(children: NotCol(col 1) -> 2:boolean) -> 3:int - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) Group By Vectorization: @@ -65,7 +65,7 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -108,13 +108,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out b/ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out index 85c4dd0f63..510c5e1599 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out @@ -70,15 +70,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_part - Statistics: Num rows: 200 Data size: 56096 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (cdouble + 2.0) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 56096 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 200 Data size: 56096 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -103,13 +103,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 56096 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 2800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 2800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out b/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out index 855d2e8beb..c3426c9b70 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out @@ -120,7 +120,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -130,7 +130,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: int) sort order: + @@ -155,7 +155,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -165,7 +165,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: int) sort order: + @@ -196,14 +196,14 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -255,7 +255,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -265,7 +265,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: int) sort order: + @@ -290,15 +290,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) Execution mode: llap LLAP IO: no inputs @@ -316,14 +316,14 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -375,7 +375,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -385,7 +385,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: int) sort order: + @@ -410,7 +410,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -420,7 +420,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: int) sort order: + @@ -451,14 +451,14 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorized_context.q.out b/ql/src/test/results/clientpositive/llap/vectorized_context.q.out index 855a50f91c..1f5a8ff133 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_context.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_context.q.out @@ -109,79 +109,51 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6075 Data size: 72744 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (ss_store_sk is not null and ss_hdemo_sk is not null) (type: boolean) - Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6075 Data size: 72744 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_store_sk (type: int), ss_hdemo_sk (type: int), ss_net_profit (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: double) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Map 2 - Map Operator Tree: - TableScan - alias: store - Statistics: Num rows: 6075 Data size: 615632 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: s_store_sk is not null (type: boolean) - Statistics: Num rows: 6075 Data size: 615632 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s_store_sk (type: int), s_city (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6075 Data size: 615632 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6075 Data size: 72744 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4 + outputColumnNames: _col0, _col2 input vertices: - 0 Map 1 - Statistics: Num rows: 6682 Data size: 80009 Basic stats: COMPLETE Column stats: NONE + 1 Map 2 + Statistics: Num rows: 6391 Data size: 76692 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + outputColumnNames: _col2, _col5 input vertices: 1 Map 3 - Statistics: Num rows: 7350 Data size: 88009 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6724 Data size: 712744 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col4 (type: string), _col2 (type: double) + expressions: _col5 (type: string), _col2 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7350 Data size: 88009 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6724 Data size: 712744 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 1100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 10600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 1100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 10600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -196,23 +168,51 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 3 + Map 2 Map Operator Tree: TableScan alias: household_demographics - Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: hd_demo_sk is not null (type: boolean) - Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan + alias: store + Statistics: Num rows: 6075 Data size: 615730 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 6075 Data size: 615730 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int), s_city (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6075 Data size: 615730 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6075 Data size: 615730 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out index cf2db94991..e3c8b206b7 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out @@ -40,24 +40,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: dtest - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int) outputColumnNames: a - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: a (type: int) mode: final outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), count(_col0) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -83,10 +83,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out b/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out index c31934bd07..fae2b7148a 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out @@ -62,11 +62,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -76,10 +76,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -90,16 +90,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -162,11 +162,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -179,10 +179,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -193,19 +193,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -269,11 +269,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -286,10 +286,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -300,17 +300,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -370,16 +370,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -387,11 +387,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -401,10 +401,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 0 Map 1 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -465,11 +465,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -482,10 +482,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -496,14 +496,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -572,11 +572,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -587,10 +587,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -601,14 +601,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -680,11 +680,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -695,10 +695,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -709,14 +709,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -786,11 +786,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -801,10 +801,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -815,14 +815,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -888,11 +888,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -903,10 +903,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -917,16 +917,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -993,14 +993,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1008,11 +1008,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -1023,10 +1023,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1101,14 +1101,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1116,11 +1116,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -1131,10 +1131,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1204,14 +1204,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1219,11 +1219,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -1234,10 +1234,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1308,16 +1308,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1325,11 +1325,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -1340,10 +1340,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1411,14 +1411,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1426,14 +1426,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1448,10 +1448,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1524,14 +1524,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1539,14 +1539,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1561,10 +1561,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1635,14 +1635,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1650,14 +1650,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1672,10 +1672,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1744,16 +1744,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1761,16 +1761,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1785,10 +1785,10 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1910 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1910 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out b/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out index 13eae75119..092942c73d 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out @@ -141,22 +141,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_parquet - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 16628 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 16628 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) Execution mode: vectorized, llap LLAP IO: no inputs @@ -181,10 +181,10 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 3328 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 3328 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out b/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out index 6cd31dbce3..da30bb5926 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out @@ -274,7 +274,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: parquet_types - Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 4906 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] @@ -285,7 +285,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 10] - Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 4906 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble), max(cdecimal) Group By Vectorization: @@ -299,12 +299,12 @@ STAGE PLANS: keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct), _col6 (type: decimal(4,2)) Execution mode: vectorized, llap LLAP IO: no inputs @@ -329,11 +329,11 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + - Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(4,2)) Reducer 3 Execution mode: vectorized, llap @@ -352,13 +352,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] - Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index 5c849f67cb..2de3daf37a 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -220,7 +220,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -812,7 +812,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1249,7 +1249,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2092,7 +2092,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2125,7 +2125,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3317,7 +3317,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4310,7 +4310,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4603,7 +4603,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5550,7 +5550,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5590,7 +5590,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5816,7 +5816,7 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2, _col1 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5883,7 +5883,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out index 82d551848c..034e2b676c 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out @@ -34,7 +34,7 @@ Stage-0 Select Operator [SEL_1] (rows=2 width=40) Output:["_col0"] TableScan [TS_0] (rows=2 width=40) - default@test,test,Tbl:COMPLETE,Col:NONE,Output:["ts"] + default@test,test,Tbl:COMPLETE,Col:COMPLETE,Output:["ts"] PREHOOK: query: SELECT ts FROM test PREHOOK: type: QUERY @@ -63,7 +63,7 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_6] - Select Operator [SEL_5] (rows=1 width=80) + Select Operator [SEL_5] (rows=1 width=92) Output:["_col0","_col1","_col2"] Group By Operator [GBY_4] (rows=1 width=80) Output:["_col0","_col1"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"] @@ -74,7 +74,7 @@ Stage-0 Select Operator [SEL_1] (rows=2 width=40) Output:["ts"] TableScan [TS_0] (rows=2 width=40) - default@test,test,Tbl:COMPLETE,Col:NONE,Output:["ts"] + default@test,test,Tbl:COMPLETE,Col:COMPLETE,Output:["ts"] PREHOOK: query: SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test PREHOOK: type: QUERY @@ -104,7 +104,7 @@ Stage-0 Filter Operator [FIL_4] (rows=1 width=40) predicate:(ts) IN (0001-01-01 00:00:00.0, 0002-02-02 00:00:00.0) TableScan [TS_0] (rows=2 width=40) - default@test,test,Tbl:COMPLETE,Col:NONE,Output:["ts"] + default@test,test,Tbl:COMPLETE,Col:COMPLETE,Output:["ts"] PREHOOK: query: SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') PREHOOK: type: QUERY @@ -132,7 +132,7 @@ Stage-0 Select Operator [SEL_3] (rows=2 width=40) Output:["_col0"] TableScan [TS_0] (rows=2 width=40) - default@test,test,Tbl:COMPLETE,Col:NONE,Output:["ts"] + default@test,test,Tbl:COMPLETE,Col:COMPLETE,Output:["ts"] PREHOOK: query: SELECT ts FROM test PREHOOK: type: QUERY @@ -172,7 +172,7 @@ Stage-0 Select Operator [SEL_7] (rows=2 width=40) Output:["ts"] TableScan [TS_0] (rows=2 width=40) - default@test,test,Tbl:COMPLETE,Col:NONE,Output:["ts"] + default@test,test,Tbl:COMPLETE,Col:COMPLETE,Output:["ts"] PREHOOK: query: SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test PREHOOK: type: QUERY @@ -202,7 +202,7 @@ Stage-0 Filter Operator [FIL_5] (rows=1 width=40) predicate:(ts) IN (0001-01-01 00:00:00.0, 0002-02-02 00:00:00.0) TableScan [TS_0] (rows=2 width=40) - default@test,test,Tbl:COMPLETE,Col:NONE,Output:["ts"] + default@test,test,Tbl:COMPLETE,Col:COMPLETE,Output:["ts"] PREHOOK: query: SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index e326f5f79e..1b2dcb88cd 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -109,7 +109,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -121,7 +121,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10] selectExpressions: VectorUDFUnixTimeStampTimestamp(col 0) -> 2:long, VectorUDFYearTimestamp(col 0, field YEAR) -> 3:long, VectorUDFMonthTimestamp(col 0, field MONTH) -> 4:long, VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 5:long, VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 6:long, VectorUDFWeekOfYearTimestamp(col 0, field WEEK_OF_YEAR) -> 7:long, VectorUDFHourTimestamp(col 0, field HOUR_OF_DAY) -> 8:long, VectorUDFMinuteTimestamp(col 0, field MINUTE) -> 9:long, VectorUDFSecondTimestamp(col 0, field SECOND) -> 10:long - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -158,13 +158,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -292,7 +292,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -304,7 +304,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10] selectExpressions: VectorUDFUnixTimeStampString(col 1) -> 2:long, VectorUDFYearString(col 1, fieldStart 0, fieldLength 4) -> 3:long, VectorUDFMonthString(col 1, fieldStart 5, fieldLength 2) -> 4:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 5:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 6:long, VectorUDFWeekOfYearString(col 1) -> 7:long, VectorUDFHourString(col 1, fieldStart 11, fieldLength 2) -> 8:long, VectorUDFMinuteString(col 1, fieldStart 14, fieldLength 2) -> 9:long, VectorUDFSecondString(col 1, fieldStart 17, fieldLength 2) -> 10:long - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -341,13 +341,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -475,7 +475,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1684 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -487,7 +487,7 @@ STAGE PLANS: native: true projectedOutputColumns: [4, 5, 6, 7, 8, 9, 10, 11, 12] selectExpressions: LongColEqualLongColumn(col 2, col 3)(children: VectorUDFUnixTimeStampTimestamp(col 0) -> 2:long, VectorUDFUnixTimeStampString(col 1) -> 3:long) -> 4:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFYearTimestamp(col 0, field YEAR) -> 2:long, VectorUDFYearString(col 1, fieldStart 0, fieldLength 4) -> 3:long) -> 5:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMonthTimestamp(col 0, field MONTH) -> 2:long, VectorUDFMonthString(col 1, fieldStart 5, fieldLength 2) -> 3:long) -> 6:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 3:long) -> 7:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 3:long) -> 8:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFWeekOfYearTimestamp(col 0, field WEEK_OF_YEAR) -> 2:long, VectorUDFWeekOfYearString(col 1) -> 3:long) -> 9:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFHourTimestamp(col 0, field HOUR_OF_DAY) -> 2:long, VectorUDFHourString(col 1, fieldStart 11, fieldLength 2) -> 3:long) -> 10:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMinuteTimestamp(col 0, field MINUTE) -> 2:long, VectorUDFMinuteString(col 1, fieldStart 14, fieldLength 2) -> 3:long) -> 11:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFSecondTimestamp(col 0, field SECOND) -> 2:long, VectorUDFSecondString(col 1, fieldStart 17, fieldLength 2) -> 3:long) -> 12:long - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -524,13 +524,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -658,7 +658,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_wrong - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 309 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -670,7 +670,7 @@ STAGE PLANS: native: true projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9] selectExpressions: VectorUDFUnixTimeStampString(col 0) -> 1:long, VectorUDFYearString(col 0, fieldStart 0, fieldLength 4) -> 2:long, VectorUDFMonthString(col 0, fieldStart 5, fieldLength 2) -> 3:long, VectorUDFDayOfMonthString(col 0, fieldStart 8, fieldLength 2) -> 4:long, VectorUDFDayOfMonthString(col 0, fieldStart 8, fieldLength 2) -> 5:long, VectorUDFWeekOfYearString(col 0) -> 6:long, VectorUDFHourString(col 0, fieldStart 11, fieldLength 2) -> 7:long, VectorUDFMinuteString(col 0, fieldStart 14, fieldLength 2) -> 8:long, VectorUDFSecondString(col 0, fieldStart 17, fieldLength 2) -> 9:long - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -707,13 +707,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -792,7 +792,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -803,7 +803,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() Group By Vectorization: @@ -853,13 +853,13 @@ STAGE PLANS: projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -918,19 +918,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp) outputColumnNames: ctimestamp1 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(ctimestamp1) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double) Execution mode: llap LLAP IO: all inputs @@ -960,7 +960,7 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: round(_col0, 3) (type: double) outputColumnNames: _col0 @@ -969,13 +969,13 @@ STAGE PLANS: native: true projectedOutputColumns: [1] selectExpressions: RoundWithNumDigitsDoubleToDouble(col 0, decimalPlaces 3) -> 1:double - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1042,7 +1042,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -1053,7 +1053,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) Group By Vectorization: @@ -1065,10 +1065,10 @@ STAGE PLANS: vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1092,14 +1092,14 @@ STAGE PLANS: aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: round(_col0, 0) (type: double), _col1 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19 (type: boolean), _col2 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19 (type: boolean), _col3 BETWEEN 9.20684592523616E19 AND 9.20684592523617E19 (type: boolean), round(_col4, 3) (type: double), round(_col5, 3) (type: double), round(_col6, 3) (type: double), round(_col7, 3) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap_acid.q.out b/ql/src/test/results/clientpositive/llap_acid.q.out index 5970fd78cb..e04af0e344 100644 --- a/ql/src/test/results/clientpositive/llap_acid.q.out +++ b/ql/src/test/results/clientpositive/llap_acid.q.out @@ -91,18 +91,18 @@ STAGE PLANS: TableScan alias: orc_llap filterExpr: cint is not null (type: boolean) - Statistics: Num rows: 20 Data size: 296 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: cint is not null (type: boolean) - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: int) sort order: ++ - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized LLAP IO: may be used (ACID table) @@ -110,10 +110,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -141,24 +141,26 @@ POSTHOOK: Input: default@orc_llap@csmallint=1 POSTHOOK: Input: default@orc_llap@csmallint=2 POSTHOOK: Input: default@orc_llap@csmallint=3 #### A masked pattern was here #### --285355633 1 -1241163445 --109813638 1 -58941842 -164554497 1 1161977292 -199879534 1 123351087 -246423894 1 -1645852809 -354670578 1 562841852 -455419170 1 1108177470 -665801232 1 480783141 -708885482 1 -1645852809 --285355633 2 -1241163445 --109813638 2 -58941842 -164554497 2 1161977292 -199879534 2 123351087 -246423894 2 -1645852809 -354670578 2 562841852 -455419170 2 1108177470 -665801232 2 480783141 -708885482 2 -1645852809 +-970918963 1 -588508542 +-734267047 1 895807844 +-548534304 1 -1900081338 +-546972460 1 665899329 +-469581869 1 1033373031 +-101217409 1 183045850 +-37908611 1 -1378658304 +274816197 1 -437339127 +371876492 1 1862746855 +927956889 1 -935575737 +-970918963 2 -588508542 +-734267047 2 895807844 +-548534304 2 -1900081338 +-546972460 2 665899329 +-469581869 2 1033373031 +-101217409 2 183045850 +-37908611 2 -1378658304 +274816197 2 -437339127 +371876492 2 1862746855 +927956889 2 -935575737 -923308739 3 -1887561756 -3728 3 -1887561756 762 3 -1645852809 @@ -216,18 +218,18 @@ STAGE PLANS: TableScan alias: orc_llap filterExpr: cint is not null (type: boolean) - Statistics: Num rows: 20 Data size: 296 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: cint is not null (type: boolean) - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: int) sort order: ++ - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized LLAP IO: may be used (ACID table) @@ -235,10 +237,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -266,25 +268,27 @@ POSTHOOK: Input: default@orc_llap@csmallint=1 POSTHOOK: Input: default@orc_llap@csmallint=2 POSTHOOK: Input: default@orc_llap@csmallint=3 #### A masked pattern was here #### --285355633 1 -1241163445 --109813638 1 -58941842 +-970918963 1 -588508542 +-734267047 1 895807844 +-548534304 1 -1900081338 +-546972460 1 665899329 +-469581869 1 1033373031 +-101217409 1 183045850 +-37908611 1 -1378658304 1 1 2 -164554497 1 1161977292 -199879534 1 123351087 -246423894 1 -1645852809 -354670578 1 562841852 -455419170 1 1108177470 -665801232 1 480783141 -708885482 1 -1645852809 --285355633 2 -1241163445 --109813638 2 -58941842 -164554497 2 1161977292 -199879534 2 123351087 -246423894 2 -1645852809 -354670578 2 562841852 -455419170 2 1108177470 -665801232 2 480783141 -708885482 2 -1645852809 +274816197 1 -437339127 +371876492 1 1862746855 +927956889 1 -935575737 +-970918963 2 -588508542 +-734267047 2 895807844 +-548534304 2 -1900081338 +-546972460 2 665899329 +-469581869 2 1033373031 +-101217409 2 183045850 +-37908611 2 -1378658304 +274816197 2 -437339127 +371876492 2 1862746855 +927956889 2 -935575737 -923308739 3 -1887561756 -3728 3 -1887561756 762 3 -1645852809 diff --git a/ql/src/test/results/clientpositive/load_dyn_part1.q.out b/ql/src/test/results/clientpositive/load_dyn_part1.q.out index 84d806d3a9..01f9446cd2 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part1.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part1.q.out @@ -58,16 +58,14 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-3, Stage-9, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 + Stage-1 depends on stages: Stage-2 Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -91,6 +89,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE @@ -106,6 +120,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -132,6 +180,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part1 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part2 + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -162,15 +224,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -191,31 +244,30 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### PREHOOK: query: from srcpart insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' diff --git a/ql/src/test/results/clientpositive/load_dyn_part10.q.out b/ql/src/test/results/clientpositive/load_dyn_part10.q.out index 99d357217d..bbfa0bb3e9 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part10.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part10.q.out @@ -46,6 +46,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -66,6 +67,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part10 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-12-31' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -83,6 +118,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part10 + PREHOOK: query: from srcpart insert overwrite table nzhang_part10 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/load_dyn_part13.q.out b/ql/src/test/results/clientpositive/load_dyn_part13.q.out index 9e0ac6fee2..742dcf6e6d 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part13.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part13.q.out @@ -60,6 +60,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -85,6 +86,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part13 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-03' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -105,6 +122,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part13 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-03' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -122,6 +173,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part13 + PREHOOK: query: insert overwrite table nzhang_part13 partition (ds="2010-03-03", hr) select * from ( select key, value, '22' diff --git a/ql/src/test/results/clientpositive/load_dyn_part14.q.out b/ql/src/test/results/clientpositive/load_dyn_part14.q.out index a6a5c63cc5..bfc0c0efb7 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part14.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part14.q.out @@ -50,6 +50,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -71,7 +72,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Limit Number of rows: 2 @@ -101,6 +101,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) TableScan Union Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE @@ -112,6 +128,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) TableScan Union Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE @@ -123,6 +155,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1130 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1130 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 1130 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -148,6 +214,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.nzhang_part14 + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -192,7 +265,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Limit Number of rows: 2 @@ -222,7 +294,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Limit Number of rows: 2 diff --git a/ql/src/test/results/clientpositive/load_dyn_part2.q.out b/ql/src/test/results/clientpositive/load_dyn_part2.q.out index 93778a22e3..e80a26279e 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part2.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part2.q.out @@ -38,6 +38,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -68,6 +70,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part_bucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-23' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -85,6 +103,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part_bucket + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table nzhang_part_bucket partition (ds='2010-03-23', hr) select key, value, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/load_dyn_part3.q.out b/ql/src/test/results/clientpositive/load_dyn_part3.q.out index 3849100785..4542fe01d9 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part3.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part3.q.out @@ -44,6 +44,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -64,6 +65,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -81,6 +116,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part3 + PREHOOK: query: insert overwrite table nzhang_part3 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/load_dyn_part4.q.out b/ql/src/test/results/clientpositive/load_dyn_part4.q.out index 40b0bbbe8c..6c363467c3 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part4.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part4.q.out @@ -54,6 +54,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -74,6 +75,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part4 + PREHOOK: query: insert overwrite table nzhang_part4 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/load_dyn_part8.q.out b/ql/src/test/results/clientpositive/load_dyn_part8.q.out index cb1a757051..681d962ed0 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part8.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part8.q.out @@ -48,8 +48,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-3, Stage-4, Stage-5 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -96,6 +99,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: (ds > '2008-04-08') (type: boolean) @@ -134,6 +156,34 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -334,6 +384,40 @@ STAGE PLANS: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=11 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -367,6 +451,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part8 + Is Table Level Stats: false + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part8 + Is Table Level Stats: false + Stage: Stage-1 Move Operator tables: @@ -399,6 +499,82 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: from srcpart insert overwrite table nzhang_part8 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' insert overwrite table nzhang_part8 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' diff --git a/ql/src/test/results/clientpositive/load_dyn_part9.q.out b/ql/src/test/results/clientpositive/load_dyn_part9.q.out index 414e784309..8955bf5d10 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part9.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part9.q.out @@ -46,6 +46,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -66,6 +67,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part9 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -83,6 +118,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part9 + PREHOOK: query: from srcpart insert overwrite table nzhang_part9 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/mapjoin_hook.q.out b/ql/src/test/results/clientpositive/mapjoin_hook.q.out index a9f9be3a4d..bd5909125f 100644 --- a/ql/src/test/results/clientpositive/mapjoin_hook.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_hook.q.out @@ -13,20 +13,24 @@ PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Output: default@dest1 [MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 1 HINTED_MAPJOIN_LOCAL: 1 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 -RUN: Stage-6:MAPREDLOCAL +RUN: Stage-7:MAPREDLOCAL RUN: Stage-2:MAPRED RUN: Stage-0:MOVE +RUN: Stage-4:MAPRED RUN: Stage-3:STATS +RUN: Stage-8:COLUMNSTATS PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest1 [MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 1 HINTED_MAPJOIN_LOCAL: 1 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 -RUN: Stage-6:MAPREDLOCAL -RUN: Stage-5:MAPRED +RUN: Stage-7:MAPREDLOCAL +RUN: Stage-6:MAPRED RUN: Stage-0:MOVE +RUN: Stage-3:MAPRED RUN: Stage-2:STATS +RUN: Stage-8:COLUMNSTATS PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') @@ -38,14 +42,14 @@ PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest1 -FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask -ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask -[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 1 BACKUP_COMMON_JOIN: 1 -RUN: Stage-6:CONDITIONAL -RUN: Stage-7:MAPREDLOCAL -RUN: Stage-1:MAPRED +[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 1 CONVERTED_MAPJOIN_LOCAL: 1 BACKUP_COMMON_JOIN: 0 +RUN: Stage-7:CONDITIONAL +RUN: Stage-8:MAPREDLOCAL +RUN: Stage-5:MAPRED RUN: Stage-0:MOVE +RUN: Stage-3:MAPRED RUN: Stage-2:STATS +RUN: Stage-10:COLUMNSTATS PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value PREHOOK: type: QUERY @@ -53,14 +57,14 @@ PREHOOK: Input: default@src PREHOOK: Output: default@dest1 FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask -FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask -ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask -[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 2 BACKUP_COMMON_JOIN: 2 -RUN: Stage-11:CONDITIONAL -RUN: Stage-14:MAPREDLOCAL +[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 1 CONVERTED_MAPJOIN_LOCAL: 2 BACKUP_COMMON_JOIN: 1 +RUN: Stage-12:CONDITIONAL +RUN: Stage-15:MAPREDLOCAL RUN: Stage-1:MAPRED -RUN: Stage-8:CONDITIONAL -RUN: Stage-12:MAPREDLOCAL -RUN: Stage-2:MAPRED +RUN: Stage-9:CONDITIONAL +RUN: Stage-13:MAPREDLOCAL +RUN: Stage-7:MAPRED RUN: Stage-0:MOVE +RUN: Stage-4:MAPRED RUN: Stage-3:STATS +RUN: Stage-17:COLUMNSTATS diff --git a/ql/src/test/results/clientpositive/mapreduce1.q.out b/ql/src/test/results/clientpositive/mapreduce1.q.out index 3d0a156557..2660486535 100644 --- a/ql/src/test/results/clientpositive/mapreduce1.q.out +++ b/ql/src/test/results/clientpositive/mapreduce1.q.out @@ -26,6 +26,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,6 +66,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -78,6 +95,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/mapreduce2.q.out b/ql/src/test/results/clientpositive/mapreduce2.q.out index 676c387c7a..25469c4920 100644 --- a/ql/src/test/results/clientpositive/mapreduce2.q.out +++ b/ql/src/test/results/clientpositive/mapreduce2.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +63,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +92,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/mapreduce3.q.out b/ql/src/test/results/clientpositive/mapreduce3.q.out index fc1a402b68..0d80be1796 100644 --- a/ql/src/test/results/clientpositive/mapreduce3.q.out +++ b/ql/src/test/results/clientpositive/mapreduce3.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +63,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +92,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/mapreduce4.q.out b/ql/src/test/results/clientpositive/mapreduce4.q.out index 17fa029ad4..8d5182a097 100644 --- a/ql/src/test/results/clientpositive/mapreduce4.q.out +++ b/ql/src/test/results/clientpositive/mapreduce4.q.out @@ -26,6 +26,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,6 +66,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -78,6 +95,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/mapreduce5.q.out b/ql/src/test/results/clientpositive/mapreduce5.q.out index 21103f88df..0bdae3da27 100644 --- a/ql/src/test/results/clientpositive/mapreduce5.q.out +++ b/ql/src/test/results/clientpositive/mapreduce5.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -55,6 +57,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -69,6 +86,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key as c1, CAST(src.key / 10 AS INT) as c2, CAST(src.key % 10 AS INT) as c3, src.value as c4 diff --git a/ql/src/test/results/clientpositive/mapreduce6.q.out b/ql/src/test/results/clientpositive/mapreduce6.q.out index fe4e631077..df1a9c6293 100644 --- a/ql/src/test/results/clientpositive/mapreduce6.q.out +++ b/ql/src/test/results/clientpositive/mapreduce6.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -55,6 +57,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -69,6 +86,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, CAST(src.key / 10 AS INT) as c2, CAST(src.key % 10 AS INT) as c3, src.value diff --git a/ql/src/test/results/clientpositive/mapreduce7.q.out b/ql/src/test/results/clientpositive/mapreduce7.q.out index cc97887fd8..4de831ab57 100644 --- a/ql/src/test/results/clientpositive/mapreduce7.q.out +++ b/ql/src/test/results/clientpositive/mapreduce7.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +63,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: string) + outputColumnNames: k, v, key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k, 16), compute_stats(v, 16), compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +92,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: k, v, key, ten, one, value + Column Types: string, string, int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.*, src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/mapreduce8.q.out b/ql/src/test/results/clientpositive/mapreduce8.q.out index b1763c792f..7a57081b2e 100644 --- a/ql/src/test/results/clientpositive/mapreduce8.q.out +++ b/ql/src/test/results/clientpositive/mapreduce8.q.out @@ -26,6 +26,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,6 +66,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: string) + outputColumnNames: k, v, key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k, 16), compute_stats(v, 16), compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -78,6 +95,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: k, v, key, ten, one, value + Column Types: string, string, int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.*, src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/merge1.q.out b/ql/src/test/results/clientpositive/merge1.q.out index 2487bf73e2..8e38bd8d74 100644 --- a/ql/src/test/results/clientpositive/merge1.q.out +++ b/ql/src/test/results/clientpositive/merge1.q.out @@ -20,9 +20,11 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-2, Stage-8 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,6 +68,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -89,6 +106,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -119,6 +143,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table dest1 select key, count(1) from src group by key PREHOOK: type: QUERY @@ -504,6 +550,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -527,6 +574,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -550,6 +623,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -605,6 +685,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -628,6 +709,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -651,6 +758,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/merge2.q.out b/ql/src/test/results/clientpositive/merge2.q.out index a8b4bd539f..90fe0f874e 100644 --- a/ql/src/test/results/clientpositive/merge2.q.out +++ b/ql/src/test/results/clientpositive/merge2.q.out @@ -20,9 +20,11 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-2, Stage-8 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,6 +68,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -89,6 +106,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.test1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -119,6 +143,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table test1 select key, count(1) from src group by key PREHOOK: type: QUERY @@ -504,6 +550,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -527,6 +574,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -550,6 +623,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -605,6 +685,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -628,6 +709,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -651,6 +758,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/merge3.q.out b/ql/src/test/results/clientpositive/merge3.q.out index 1eb5f0e03a..86b976f9c7 100644 --- a/ql/src/test/results/clientpositive/merge3.q.out +++ b/ql/src/test/results/clientpositive/merge3.q.out @@ -2376,6 +2376,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -2421,6 +2422,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2432,7 +2452,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2479,7 +2499,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2521,6 +2541,40 @@ STAGE PLANS: Truncated Path -> Alias: /merge_src_part/ds=2008-04-08 [merge_src_part] /merge_src_part/ds=2008-04-09 [merge_src_part] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -2562,6 +2616,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_src_part2 + Is Table Level Stats: false + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -4797,6 +4859,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -4814,8 +4877,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - null sort order: - sort order: + key expressions: _col2 (type: string) + null sort order: a + sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE tag: -1 @@ -4832,7 +4896,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4879,7 +4943,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4956,6 +5020,42 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -4997,6 +5097,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_src_part2 + Is Table Level Stats: false + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/merge4.q.out b/ql/src/test/results/clientpositive/merge4.q.out index 182c6a887e..c73150d1b3 100644 --- a/ql/src/test/results/clientpositive/merge4.q.out +++ b/ql/src/test/results/clientpositive/merge4.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -41,6 +42,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-08-15' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -67,6 +102,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -1138,6 +1180,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -1161,6 +1204,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-08-15' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -1187,6 +1264,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -2763,9 +2847,11 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-2, Stage-8 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -2786,7 +2872,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) TableScan alias: src @@ -2803,7 +2888,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Select Operator @@ -2821,6 +2905,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-08-15' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1 Data size: 353 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1167 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -2847,6 +2947,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -2877,6 +2984,35 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1167 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1143 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1143 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1143 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table nzhang_part partition (ds='2010-08-15', hr) select * from ( select key, value, hr from srcpart where ds='2008-04-08' diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition.q.out index a777fe0830..a3ec1e13dd 100644 --- a/ql/src/test/results/clientpositive/merge_dynamic_partition.q.out +++ b/ql/src/test/results/clientpositive/merge_dynamic_partition.q.out @@ -57,6 +57,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -77,6 +78,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -94,6 +129,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part + PREHOOK: query: insert overwrite table merge_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart_merge_dp where ds='2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart_merge_dp @@ -645,6 +687,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -668,6 +711,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -694,6 +771,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -1275,6 +1359,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -1298,6 +1383,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -1324,6 +1443,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out index 5a2afb01ac..e68cd55618 100644 --- a/ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out +++ b/ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out @@ -76,6 +76,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -99,6 +100,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 43 Data size: 8607 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 43 Data size: 8607 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 43 Data size: 8607 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -125,6 +160,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out index 055e07abd8..5df6d4a0c1 100644 --- a/ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out +++ b/ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out @@ -136,6 +136,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -159,6 +160,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -185,6 +220,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out index cbeaf42eaf..4d70e641db 100644 --- a/ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out +++ b/ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out @@ -137,6 +137,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -160,6 +161,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -186,6 +221,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out index 5a562f4456..347bd042f0 100644 --- a/ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out +++ b/ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out @@ -113,6 +113,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -136,6 +137,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 309 Data size: 2967 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 309 Data size: 2967 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 309 Data size: 2967 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -162,6 +197,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/mergejoin.q.out b/ql/src/test/results/clientpositive/mergejoin.q.out index 1961d83c68..ff5be66aeb 100644 --- a/ql/src/test/results/clientpositive/mergejoin.q.out +++ b/ql/src/test/results/clientpositive/mergejoin.q.out @@ -1659,36 +1659,36 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 242 Data size: 22748 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 242 Data size: 22748 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col0 (type: int) TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + alias: c + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 51700 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 266 Data size: 25022 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1701,34 +1701,34 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 51700 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 266 Data size: 25022 Basic stats: COMPLETE Column stats: NONE TableScan - alias: c - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 56870 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 550 Data size: 51700 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -2195,36 +2195,36 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 242 Data size: 22748 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 242 Data size: 22748 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col0 (type: int) TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + alias: c + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 51700 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 266 Data size: 25022 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -2237,34 +2237,34 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 51700 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 266 Data size: 25022 Basic stats: COMPLETE Column stats: NONE TableScan - alias: c - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 56870 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 550 Data size: 51700 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/metadata_only_queries.q.out index 82b4a419e1..ba5418e73a 100644 --- a/ql/src/test/results/clientpositive/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/metadata_only_queries.q.out @@ -183,46 +183,12 @@ POSTHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: stats_tbl - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -233,46 +199,12 @@ POSTHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: stats_tbl_part - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -283,50 +215,12 @@ POSTHOOK: query: explain select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: stats_tbl - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -337,50 +231,12 @@ POSTHOOK: query: explain select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: stats_tbl_part - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink diff --git a/ql/src/test/results/clientpositive/multi_insert_gby.q.out b/ql/src/test/results/clientpositive/multi_insert_gby.q.out index cb97e5871f..09fb2eedf5 100644 --- a/ql/src/test/results/clientpositive/multi_insert_gby.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_gby.q.out @@ -30,8 +30,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -72,6 +76,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 > 500) (type: boolean) Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE @@ -93,6 +112,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -107,6 +141,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -117,9 +187,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE e1 SELECT key, COUNT(*) WHERE key>450 GROUP BY key @@ -211,8 +303,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-3 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -251,6 +347,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 > 450) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -272,6 +383,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -286,6 +412,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Move Operator tables: @@ -296,9 +458,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE e1 SELECT key, COUNT(*) WHERE key>450 GROUP BY key diff --git a/ql/src/test/results/clientpositive/multi_insert_gby2.q.out b/ql/src/test/results/clientpositive/multi_insert_gby2.q.out index 476dfa7667..d44080c02e 100644 --- a/ql/src/test/results/clientpositive/multi_insert_gby2.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_gby2.q.out @@ -33,6 +33,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -92,6 +94,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: count + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(count, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Group By Operator aggregations: percentile_approx(VALUE._col0, 0.5) mode: complete @@ -105,6 +127,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: double) + outputColumnNames: percentile + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(percentile, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -119,6 +161,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: count + Column Types: int + Table: default.e1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: percentile + Column Types: double + Table: default.e2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/multi_insert_gby3.q.out b/ql/src/test/results/clientpositive/multi_insert_gby3.q.out index 32aec10e31..424bc66fe4 100644 --- a/ql/src/test/results/clientpositive/multi_insert_gby3.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_gby3.q.out @@ -41,8 +41,12 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -103,6 +107,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) @@ -121,6 +140,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: key, keyd, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -135,6 +169,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, keyd, value + Column Types: string, double, string + Table: default.e2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -145,9 +215,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM (select key, cast(key as double) as keyD, value from src order by key) a INSERT OVERWRITE TABLE e2 @@ -167,8 +259,12 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -229,6 +325,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) @@ -247,6 +358,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: key, keyd, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -261,6 +387,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, keyd, value + Column Types: string, double, string + Table: default.e2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -271,9 +433,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM (select key, cast(key as double) as keyD, value from src order by key) a INSERT OVERWRITE TABLE e1 SELECT key, COUNT(distinct value) group by key @@ -1606,9 +1790,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -1667,6 +1855,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1681,11 +1884,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) @@ -1709,6 +1948,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -1720,9 +1974,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM (select key, cast(key as double) as keyD, value from src order by key) a INSERT OVERWRITE TABLE e1 @@ -1746,10 +2022,16 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10 + Stage-13 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10 + Stage-6 depends on stages: Stage-4 Stage-1 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-4 Stage-2 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-2 + Stage-10 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -1797,6 +2079,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce @@ -1829,6 +2126,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) @@ -1847,6 +2159,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: key, keyd, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1861,6 +2188,49 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, keyd, value + Column Types: string, double, string + Table: default.e2 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e3 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -1871,9 +2241,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-6 + Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 Move Operator tables: @@ -1884,6 +2276,28 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/multi_insert_gby4.q.out b/ql/src/test/results/clientpositive/multi_insert_gby4.q.out index dd01b74da1..ed17fac68c 100644 --- a/ql/src/test/results/clientpositive/multi_insert_gby4.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_gby4.q.out @@ -44,10 +44,16 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7, Stage-8, Stage-9 + Stage-12 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 Stage-2 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -92,6 +98,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 > 500) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -113,6 +134,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 > 490) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -134,6 +170,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -148,6 +199,49 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e3 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -158,9 +252,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 Move Operator tables: @@ -171,9 +287,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 - Stage: Stage-6 + Stage: Stage-8 Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM (SELECT key, value FROM src) a INSERT OVERWRITE TABLE e1 SELECT key, COUNT(*) WHERE key>450 GROUP BY key diff --git a/ql/src/test/results/clientpositive/multi_insert_mixed.q.out b/ql/src/test/results/clientpositive/multi_insert_mixed.q.out index 9acae2ec54..67e434662e 100644 --- a/ql/src/test/results/clientpositive/multi_insert_mixed.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_mixed.q.out @@ -39,12 +39,16 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-5, Stage-8, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-5, Stage-8, Stage-9, Stage-10 + Stage-13 depends on stages: Stage-5, Stage-8, Stage-9, Stage-10 Stage-6 depends on stages: Stage-3 Stage-7 depends on stages: Stage-6 Stage-1 depends on stages: Stage-7 Stage-8 depends on stages: Stage-1 Stage-2 depends on stages: Stage-3 Stage-9 depends on stages: Stage-2 + Stage-10 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -100,6 +104,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -136,6 +155,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -150,6 +189,27 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi3 + Stage: Stage-6 Map Reduce Map Operator Tree: @@ -196,6 +256,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -223,6 +303,28 @@ STAGE PLANS: Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select key, count(1) group by key order by key insert overwrite table src_multi2 select value, count(1) group by value order by value diff --git a/ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out b/ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out index af0ef54a74..20c59c2bb2 100644 --- a/ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out @@ -29,8 +29,11 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-5, Stage-6 Stage-1 depends on stages: Stage-4 Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -54,6 +57,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -69,6 +85,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -86,6 +130,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -99,6 +157,28 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 @@ -166,19 +246,17 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-2 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-12 depends on stages: Stage-3, Stage-10, Stage-11 + Stage-13 depends on stages: Stage-3, Stage-10, Stage-11 Stage-1 depends on stages: Stage-4 Stage-10 depends on stages: Stage-1 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-11 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -202,6 +280,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -217,6 +308,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -243,6 +362,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -286,44 +419,27 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-11 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -390,11 +506,19 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-2, Stage-7, Stage-6, Stage-9 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-12 depends on stages: Stage-3, Stage-5, Stage-11 + Stage-13 depends on stages: Stage-3, Stage-5, Stage-11 Stage-1 depends on stages: Stage-4 Stage-5 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-2 , consists of Stage-7, Stage-6, Stage-8 + Stage-7 + Stage-6 + Stage-8 + Stage-9 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -418,6 +542,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -433,6 +570,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -450,6 +615,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -463,6 +642,67 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-10 + Conditional Operator + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-9 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 @@ -533,6 +773,8 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-17 depends on stages: Stage-3, Stage-10, Stage-16 + Stage-18 depends on stages: Stage-3, Stage-10, Stage-16 Stage-1 depends on stages: Stage-4 Stage-10 depends on stages: Stage-1 Stage-5 @@ -543,6 +785,7 @@ STAGE DEPENDENCIES: Stage-11 Stage-13 Stage-14 depends on stages: Stage-13 + Stage-16 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -566,6 +809,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -581,6 +837,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -607,6 +891,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-17 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-18 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -689,6 +987,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-16 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 @@ -757,8 +1077,12 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-6, Stage-5, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-6, Stage-5, Stage-7 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -794,6 +1118,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -810,6 +1149,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Dependency Collection @@ -827,6 +1181,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -837,9 +1205,53 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value @@ -900,19 +1312,23 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-13, Stage-12, Stage-15 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-18 depends on stages: Stage-3, Stage-11, Stage-10, Stage-17 + Stage-19 depends on stages: Stage-3, Stage-11, Stage-10, Stage-17 Stage-1 depends on stages: Stage-4 - Stage-10 depends on stages: Stage-1 + Stage-11 depends on stages: Stage-1 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 + Stage-10 depends on stages: Stage-2 + Stage-16 depends on stages: Stage-2 , consists of Stage-13, Stage-12, Stage-14 Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-12 + Stage-14 + Stage-15 depends on stages: Stage-14 + Stage-17 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -948,6 +1364,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -964,6 +1395,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-9 Conditional Operator @@ -990,6 +1436,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-18 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-19 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1000,7 +1460,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-10 + Stage: Stage-11 Stats-Aggr Operator Stage: Stage-5 @@ -1033,16 +1493,38 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-16 Conditional Operator - Stage: Stage-12 + Stage: Stage-13 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-11 + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan @@ -1054,7 +1536,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-13 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan @@ -1066,12 +1548,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-14 + Stage: Stage-15 Move Operator files: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-17 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value @@ -1133,8 +1637,12 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-6, Stage-5, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-6, Stage-5, Stage-7 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1170,6 +1678,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1186,6 +1709,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Dependency Collection @@ -1203,6 +1741,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1213,9 +1765,53 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value @@ -1276,19 +1872,23 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-13, Stage-12, Stage-15 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-18 depends on stages: Stage-3, Stage-11, Stage-10, Stage-17 + Stage-19 depends on stages: Stage-3, Stage-11, Stage-10, Stage-17 Stage-1 depends on stages: Stage-4 - Stage-10 depends on stages: Stage-1 + Stage-11 depends on stages: Stage-1 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 + Stage-10 depends on stages: Stage-2 + Stage-16 depends on stages: Stage-2 , consists of Stage-13, Stage-12, Stage-14 Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-12 + Stage-14 + Stage-15 depends on stages: Stage-14 + Stage-17 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1324,6 +1924,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1340,6 +1955,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-9 Conditional Operator @@ -1366,6 +1996,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-18 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-19 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1376,7 +2020,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-10 + Stage: Stage-11 Stats-Aggr Operator Stage: Stage-5 @@ -1409,16 +2053,38 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-16 Conditional Operator - Stage: Stage-12 + Stage: Stage-13 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-11 + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan @@ -1430,7 +2096,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-13 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan @@ -1442,12 +2108,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-14 + Stage: Stage-15 Move Operator files: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-17 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value @@ -1509,8 +2197,11 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-3, Stage-5, Stage-6 Stage-1 depends on stages: Stage-4 Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1536,6 +2227,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1547,6 +2251,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1567,6 +2286,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1578,6 +2310,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -1595,6 +2355,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1608,6 +2382,28 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 @@ -1694,19 +2490,17 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-2 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-13 depends on stages: Stage-3, Stage-10, Stage-11 + Stage-14 depends on stages: Stage-3, Stage-10, Stage-11 Stage-1 depends on stages: Stage-4 Stage-10 depends on stages: Stage-1 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-11 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1732,6 +2526,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1743,6 +2550,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1763,6 +2585,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1774,6 +2609,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -1800,6 +2663,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1843,44 +2720,27 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-11 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -1966,11 +2826,19 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-2, Stage-7, Stage-6, Stage-9 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-13 depends on stages: Stage-3, Stage-5, Stage-11 + Stage-14 depends on stages: Stage-3, Stage-5, Stage-11 Stage-1 depends on stages: Stage-4 Stage-5 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-2 , consists of Stage-7, Stage-6, Stage-8 + Stage-7 + Stage-6 + Stage-8 + Stage-9 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1996,6 +2864,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -2007,6 +2888,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2027,6 +2923,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -2038,6 +2947,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -2055,6 +2992,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -2068,39 +3019,100 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator -PREHOOK: query: from (select * from src union all select * from src) s -insert overwrite table src_multi1 select * where key < 10 -insert overwrite table src_multi2 select * where key > 10 and key < 20 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@src_multi1 -PREHOOK: Output: default@src_multi2 -POSTHOOK: query: from (select * from src union all select * from src) s -insert overwrite table src_multi1 select * where key < 10 -insert overwrite table src_multi2 select * where key > 10 and key < 20 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@src_multi1 -POSTHOOK: Output: default@src_multi2 -POSTHOOK: Lineage: src_multi1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_multi1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_multi2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select * from src_multi1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_multi1 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_multi1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_multi1 + Stage: Stage-10 + Conditional Operator + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -2 val_2 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-9 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: from (select * from src union all select * from src) s +insert overwrite table src_multi1 select * where key < 10 +insert overwrite table src_multi2 select * where key > 10 and key < 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_multi1 +PREHOOK: Output: default@src_multi2 +POSTHOOK: query: from (select * from src union all select * from src) s +insert overwrite table src_multi1 select * where key < 10 +insert overwrite table src_multi2 select * where key > 10 and key < 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_multi1 +POSTHOOK: Output: default@src_multi2 +POSTHOOK: Lineage: src_multi1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_multi1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_multi2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from src_multi1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_multi1 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_multi1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_multi1 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +2 val_2 2 val_2 4 val_4 4 val_4 @@ -2157,6 +3169,8 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-18 depends on stages: Stage-3, Stage-10, Stage-16 + Stage-19 depends on stages: Stage-3, Stage-10, Stage-16 Stage-1 depends on stages: Stage-4 Stage-10 depends on stages: Stage-1 Stage-5 @@ -2167,6 +3181,7 @@ STAGE DEPENDENCIES: Stage-11 Stage-13 Stage-14 depends on stages: Stage-13 + Stage-16 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -2192,6 +3207,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -2203,6 +3231,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2223,6 +3266,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -2234,6 +3290,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -2260,6 +3344,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-18 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-19 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -2342,6 +3440,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-16 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 @@ -3273,14 +4393,18 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-6 depends on stages: Stage-4, Stage-8 + Stage-6 depends on stages: Stage-4, Stage-9, Stage-10 Stage-0 depends on stages: Stage-6 Stage-5 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-5, Stage-7, Stage-8, Stage-2, Stage-3 + Stage-12 depends on stages: Stage-5, Stage-7, Stage-8, Stage-2, Stage-3 Stage-1 depends on stages: Stage-6 Stage-7 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-4 Stage-8 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-9 + Stage-10 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-4 @@ -3304,6 +4428,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3319,46 +4456,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Filter Operator - predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-6 Dependency Collection @@ -3376,6 +4510,20 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -3389,13 +4537,77 @@ STAGE PLANS: Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-2 Move Operator files: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-8 + Stage: Stage-10 Map Reduce Map Operator Tree: TableScan @@ -3497,14 +4709,23 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-6 depends on stages: Stage-4, Stage-8 + Stage-6 depends on stages: Stage-4, Stage-9, Stage-8, Stage-11, Stage-14, Stage-15 Stage-0 depends on stages: Stage-6 Stage-5 depends on stages: Stage-0 + Stage-16 depends on stages: Stage-5, Stage-7, Stage-13, Stage-2, Stage-3 + Stage-17 depends on stages: Stage-5, Stage-7, Stage-13, Stage-2, Stage-3 Stage-1 depends on stages: Stage-6 Stage-7 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-4 - Stage-8 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-8 + Stage-12 depends on stages: Stage-4 , consists of Stage-9, Stage-8, Stage-10 + Stage-9 + Stage-8 + Stage-10 + Stage-11 depends on stages: Stage-10 + Stage-13 depends on stages: Stage-4 + Stage-14 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-14 + Stage-15 depends on stages: Stage-14 + Stage-3 depends on stages: Stage-15 STAGE PLANS: Stage: Stage-4 @@ -3528,6 +4749,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3543,46 +4777,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Filter Operator - predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-6 Dependency Collection @@ -3600,6 +4831,20 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-16 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-17 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -3613,22 +4858,125 @@ STAGE PLANS: Stage: Stage-7 Stats-Aggr Operator - Stage: Stage-2 + Stage: Stage-12 + Conditional Operator + + Stage: Stage-9 Move Operator files: - hdfs directory: false + hdfs directory: true #### A masked pattern was here #### Stage: Stage-8 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-11 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-13 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-14 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Move Operator + files: + hdfs directory: false +#### A masked pattern was here #### + + Stage: Stage-15 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -3723,22 +5071,21 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-11 depends on stages: Stage-4 , consists of Stage-8, Stage-7, Stage-9 Stage-8 - Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-14, Stage-13, Stage-16, Stage-4, Stage-18 + Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-4, Stage-14, Stage-15 Stage-0 depends on stages: Stage-6 Stage-5 depends on stages: Stage-0 + Stage-16 depends on stages: Stage-5, Stage-12, Stage-13, Stage-2, Stage-3 + Stage-17 depends on stages: Stage-5, Stage-12, Stage-13, Stage-2, Stage-3 Stage-1 depends on stages: Stage-6 Stage-12 depends on stages: Stage-1 Stage-7 Stage-9 Stage-10 depends on stages: Stage-9 - Stage-17 depends on stages: Stage-4 , consists of Stage-14, Stage-13, Stage-15 - Stage-14 - Stage-13 - Stage-15 - Stage-16 depends on stages: Stage-15 - Stage-2 depends on stages: Stage-4 - Stage-18 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-18 + Stage-13 depends on stages: Stage-4 + Stage-14 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-14 + Stage-15 depends on stages: Stage-14 + Stage-3 depends on stages: Stage-15 STAGE PLANS: Stage: Stage-4 @@ -3762,6 +5109,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3777,46 +5137,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Filter Operator - predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-11 Conditional Operator @@ -3843,6 +5200,20 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-16 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-17 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -3886,44 +5257,69 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-17 - Conditional Operator - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-13 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-15 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-16 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -3931,7 +5327,7 @@ STAGE PLANS: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-18 + Stage: Stage-15 Map Reduce Map Operator Tree: TableScan @@ -4035,9 +5431,11 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-11 depends on stages: Stage-4 , consists of Stage-8, Stage-7, Stage-9 Stage-8 - Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-14, Stage-13, Stage-16, Stage-4, Stage-18 + Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-14, Stage-13, Stage-16, Stage-19, Stage-20 Stage-0 depends on stages: Stage-6 Stage-5 depends on stages: Stage-0 + Stage-21 depends on stages: Stage-5, Stage-12, Stage-18, Stage-2, Stage-3 + Stage-22 depends on stages: Stage-5, Stage-12, Stage-18, Stage-2, Stage-3 Stage-1 depends on stages: Stage-6 Stage-12 depends on stages: Stage-1 Stage-7 @@ -4048,9 +5446,11 @@ STAGE DEPENDENCIES: Stage-13 Stage-15 Stage-16 depends on stages: Stage-15 - Stage-2 depends on stages: Stage-4 Stage-18 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-18 + Stage-19 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-19 + Stage-20 depends on stages: Stage-19 + Stage-3 depends on stages: Stage-20 STAGE PLANS: Stage: Stage-4 @@ -4074,6 +5474,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -4089,46 +5502,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Filter Operator - predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-11 Conditional Operator @@ -4155,6 +5565,20 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-21 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-22 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -4237,13 +5661,77 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-18 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-19 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-2 Move Operator files: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-18 + Stage: Stage-20 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/multi_insert_union_src.q.out b/ql/src/test/results/clientpositive/multi_insert_union_src.q.out index 1ff1db5c12..28dd199190 100644 --- a/ql/src/test/results/clientpositive/multi_insert_union_src.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_union_src.q.out @@ -52,6 +52,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-5 + Stage-8 depends on stages: Stage-3, Stage-5 Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-4 Stage-5 depends on stages: Stage-1 @@ -131,6 +133,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 58 Data size: 608 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -145,6 +167,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -167,6 +203,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 58 Data size: 608 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator diff --git a/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out b/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out index 225f2c301c..b0b0c71670 100644 --- a/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out @@ -67,6 +67,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -118,6 +120,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -132,6 +149,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain FROM T_A a LEFT JOIN T_B b ON a.id = b.id @@ -149,6 +195,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -200,6 +248,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -214,6 +277,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM T_A a LEFT JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_1 @@ -236,8 +328,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-3 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -285,6 +381,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -300,6 +411,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -314,6 +440,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Move Operator tables: @@ -324,9 +486,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM T_A a LEFT JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_1 @@ -349,8 +533,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -398,6 +586,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -413,6 +616,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -427,6 +645,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -437,9 +691,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM T_A a JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_1 @@ -462,8 +738,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -517,6 +797,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -532,6 +827,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -546,6 +856,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -556,9 +902,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM T_A a JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_1 @@ -581,8 +949,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -636,6 +1008,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -651,6 +1038,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -665,6 +1067,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -675,9 +1113,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM T_A a JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_1 @@ -700,8 +1160,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -767,6 +1231,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col2 = 'val_104') and (_col3 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -782,6 +1261,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -796,6 +1290,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -806,9 +1336,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM T_A a JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_1 @@ -831,8 +1383,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -898,6 +1454,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col2 = 'val_104') and (_col3 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -913,6 +1484,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -927,6 +1513,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -937,6 +1559,28 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/multigroupby_singlemr.q.out b/ql/src/test/results/clientpositive/multigroupby_singlemr.q.out index 7af8c4356d..16e38148a8 100644 --- a/ql/src/test/results/clientpositive/multigroupby_singlemr.q.out +++ b/ql/src/test/results/clientpositive/multigroupby_singlemr.q.out @@ -52,9 +52,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -114,6 +118,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -128,11 +147,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) @@ -157,6 +212,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -168,9 +238,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: EXPLAIN FROM TBL INSERT OVERWRITE TABLE DEST1 SELECT TBL.C1, COUNT(TBL.C2) GROUP BY TBL.C1 @@ -185,9 +277,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -247,6 +343,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -261,11 +372,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) @@ -290,6 +437,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -301,9 +463,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: EXPLAIN FROM TBL INSERT OVERWRITE TABLE DEST3 SELECT TBL.C1, TBL.C2, TBL.C3, COUNT(TBL.C4) GROUP BY TBL.C1, TBL.C2, TBL.C3 @@ -318,9 +502,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -380,6 +568,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16), compute_stats(d4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -394,11 +597,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) @@ -423,6 +662,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -434,9 +688,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: EXPLAIN FROM TBL INSERT OVERWRITE TABLE DEST3 SELECT TBL.C1, TBL.C2, TBL.C3, COUNT(TBL.C4) GROUP BY TBL.C1, TBL.C2, TBL.C3 @@ -451,8 +727,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -492,6 +772,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16), compute_stats(d4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col2 (type: int), KEY._col1 (type: int) @@ -510,6 +805,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest4 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16), compute_stats(d4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -524,6 +834,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest4 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -534,9 +880,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest4 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: EXPLAIN FROM TBL INSERT OVERWRITE TABLE DEST3 SELECT TBL.C1, TBL.C2, TBL.C3, COUNT(TBL.C4) GROUP BY TBL.C1, TBL.C2, TBL.C3 @@ -553,12 +921,18 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-12 depends on stages: Stage-4, Stage-5, Stage-7, Stage-8, Stage-10, Stage-11 + Stage-13 depends on stages: Stage-4, Stage-5, Stage-7, Stage-8, Stage-10, Stage-11 + Stage-14 depends on stages: Stage-4, Stage-5, Stage-7, Stage-8, Stage-10, Stage-11 Stage-5 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-1 - Stage-7 depends on stages: Stage-3 - Stage-2 depends on stages: Stage-7 - Stage-8 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-6 + Stage-9 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-9 + Stage-10 depends on stages: Stage-2 + Stage-11 depends on stages: Stage-9 STAGE PLANS: Stage: Stage-3 @@ -634,6 +1008,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16), compute_stats(d4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -648,11 +1037,54 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) @@ -677,6 +1109,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -688,10 +1135,32 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-6 + Stage: Stage-7 Stats-Aggr Operator - Stage: Stage-7 + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -720,6 +1189,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -731,6 +1215,28 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-8 + Stage: Stage-10 Stats-Aggr Operator + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out b/ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out index d01461b51f..4265eb966e 100644 --- a/ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out +++ b/ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -36,7 +38,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -58,6 +59,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, as + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(as, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -72,6 +88,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, as + Column Types: int, string + Table: default.insert + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE `insert` SELECT * FROM src LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -103,6 +148,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -121,7 +168,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -143,6 +189,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, as + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(as, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -157,6 +218,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, as + Column Types: int, string + Table: default.insert + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE `insert` SELECT * FROM src LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -197,6 +287,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -215,7 +307,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -237,6 +328,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, as + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(as, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -251,6 +357,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, as + Column Types: int, string + Table: default.insert + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE `insert` SELECT * FROM src LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/notable_alias1.q.out b/ql/src/test/results/clientpositive/notable_alias1.q.out index 200b31cfaa..3e449590ea 100644 --- a/ql/src/test/results/clientpositive/notable_alias1.q.out +++ b/ql/src/test/results/clientpositive/notable_alias1.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,6 +66,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: dummy, key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(dummy, 16), compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -78,6 +95,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: dummy, key, value + Column Types: string, int, double + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT '1234', key, count(1) WHERE src.key < 100 group by key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/notable_alias2.q.out b/ql/src/test/results/clientpositive/notable_alias2.q.out index 4df8073cda..49a2b9544f 100644 --- a/ql/src/test/results/clientpositive/notable_alias2.q.out +++ b/ql/src/test/results/clientpositive/notable_alias2.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,6 +66,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: dummy, key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(dummy, 16), compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -78,6 +95,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: dummy, key, value + Column Types: string, int, double + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT '1234', src.key, count(1) WHERE key < 100 group by src.key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/orc_merge1.q.out b/ql/src/test/results/clientpositive/orc_merge1.q.out index a83e85bec4..939bf2a6f5 100644 --- a/ql/src/test/results/clientpositive/orc_merge1.q.out +++ b/ql/src/test/results/clientpositive/orc_merge1.q.out @@ -54,6 +54,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -74,6 +75,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part FROM src @@ -126,6 +168,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -149,6 +192,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -175,6 +252,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -240,6 +324,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -263,6 +348,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -289,6 +408,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge10.q.out b/ql/src/test/results/clientpositive/orc_merge10.q.out index 607aaeb6ae..468b03d7c5 100644 --- a/ql/src/test/results/clientpositive/orc_merge10.q.out +++ b/ql/src/test/results/clientpositive/orc_merge10.q.out @@ -54,6 +54,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -74,6 +75,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part FROM src @@ -126,6 +168,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -149,6 +192,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -175,6 +252,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -240,6 +324,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -263,6 +348,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -289,6 +408,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge2.q.out b/ql/src/test/results/clientpositive/orc_merge2.q.out index d4c474f9fe..29b3373e10 100644 --- a/ql/src/test/results/clientpositive/orc_merge2.q.out +++ b/ql/src/test/results/clientpositive/orc_merge2.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -53,6 +54,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge2a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string) + outputColumnNames: key, value, one, two, three + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: one (type: string), two (type: string), three (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -80,6 +115,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge2a + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge3.q.out b/ql/src/test/results/clientpositive/orc_merge3.q.out index 7bf12c6c28..416cb3c0d6 100644 --- a/ql/src/test/results/clientpositive/orc_merge3.q.out +++ b/ql/src/test/results/clientpositive/orc_merge3.q.out @@ -60,6 +60,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -83,6 +84,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -106,6 +133,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge4.q.out b/ql/src/test/results/clientpositive/orc_merge4.q.out index 828f204157..7c85443506 100644 --- a/ql/src/test/results/clientpositive/orc_merge4.q.out +++ b/ql/src/test/results/clientpositive/orc_merge4.q.out @@ -78,6 +78,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -101,6 +102,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 94000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -124,6 +151,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge5.q.out b/ql/src/test/results/clientpositive/orc_merge5.q.out index e8451869f6..0d965df6b7 100644 --- a/ql/src/test/results/clientpositive/orc_merge5.q.out +++ b/ql/src/test/results/clientpositive/orc_merge5.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -54,6 +55,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -68,6 +95,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b + PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 @@ -112,6 +146,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -139,6 +174,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -162,6 +223,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge6.q.out b/ql/src/test/results/clientpositive/orc_merge6.q.out index 5ece361bbc..5de3c47bfd 100644 --- a/ql/src/test/results/clientpositive/orc_merge6.q.out +++ b/ql/src/test/results/clientpositive/orc_merge6.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -54,6 +55,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -71,6 +106,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + PREHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 @@ -157,6 +199,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -184,6 +227,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -210,6 +287,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out b/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out index a83e85bec4..939bf2a6f5 100644 --- a/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out +++ b/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out @@ -54,6 +54,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -74,6 +75,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part FROM src @@ -126,6 +168,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -149,6 +192,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -175,6 +252,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -240,6 +324,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -263,6 +348,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -289,6 +408,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge_incompat1.q.out b/ql/src/test/results/clientpositive/orc_merge_incompat1.q.out index fcf1c68f83..762acd002c 100644 --- a/ql/src/test/results/clientpositive/orc_merge_incompat1.q.out +++ b/ql/src/test/results/clientpositive/orc_merge_incompat1.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -53,6 +54,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -67,6 +94,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b + PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 diff --git a/ql/src/test/results/clientpositive/orc_merge_incompat2.q.out b/ql/src/test/results/clientpositive/orc_merge_incompat2.q.out index a27041fd9b..6388dfc322 100644 --- a/ql/src/test/results/clientpositive/orc_merge_incompat2.q.out +++ b/ql/src/test/results/clientpositive/orc_merge_incompat2.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -50,6 +51,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -66,6 +101,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 order by userid PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 diff --git a/ql/src/test/results/clientpositive/parallel.q.out b/ql/src/test/results/clientpositive/parallel.q.out index 459105e09a..c85113137b 100644 --- a/ql/src/test/results/clientpositive/parallel.q.out +++ b/ql/src/test/results/clientpositive/parallel.q.out @@ -29,8 +29,12 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -91,6 +95,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_a + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete @@ -104,6 +123,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_b + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -118,6 +152,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_a + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_b + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -128,9 +198,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_b - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from (select key, value from src group by key, value) s insert overwrite table src_a select s.key, s.value group by s.key, s.value insert overwrite table src_b select s.key, s.value group by s.key, s.value diff --git a/ql/src/test/results/clientpositive/parallel_join1.q.out b/ql/src/test/results/clientpositive/parallel_join1.q.out index 8843661176..43480e1763 100644 --- a/ql/src/test/results/clientpositive/parallel_join1.q.out +++ b/ql/src/test/results/clientpositive/parallel_join1.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -75,6 +77,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -89,6 +106,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/parquet_analyze.q.out b/ql/src/test/results/clientpositive/parquet_analyze.q.out index d3cdc3f3fc..8627b180ad 100644 --- a/ql/src/test/results/clientpositive/parquet_analyze.q.out +++ b/ql/src/test/results/clientpositive/parquet_analyze.q.out @@ -90,7 +90,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\",\"state\":\"true\"}} numFiles 1 numRows 100 rawDataSize 700 @@ -138,7 +138,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\",\"state\":\"true\"}} numFiles 1 numRows 100 rawDataSize 5952 diff --git a/ql/src/test/results/clientpositive/parquet_int96_timestamp.q.out b/ql/src/test/results/clientpositive/parquet_int96_timestamp.q.out index d15fd81f73..a2011c85c5 100644 --- a/ql/src/test/results/clientpositive/parquet_int96_timestamp.q.out +++ b/ql/src/test/results/clientpositive/parquet_int96_timestamp.q.out @@ -56,7 +56,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} numFiles 1 numRows 1 parquet.mr.int96.write.zone UTC @@ -125,7 +125,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} numFiles 1 numRows 1 parquet.mr.int96.write.zone PST @@ -194,7 +194,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} numFiles 1 numRows 1 rawDataSize 1 @@ -262,7 +262,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} numFiles 1 numRows 1 parquet.mr.int96.write.zone CST @@ -368,7 +368,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} numFiles 2 numRows 2 parquet.mr.int96.write.zone PST @@ -471,7 +471,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} numFiles 2 numRows 2 parquet.mr.int96.write.zone PST @@ -514,7 +514,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} numFiles 2 numRows 2 parquet.mr.int96.write.zone GMT+2 diff --git a/ql/src/test/results/clientpositive/pcr.q.out b/ql/src/test/results/clientpositive/pcr.q.out index 5d0d170245..503f587a38 100644 --- a/ql/src/test/results/clientpositive/pcr.q.out +++ b/ql/src/test/results/clientpositive/pcr.q.out @@ -91,7 +91,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -138,7 +138,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -280,7 +280,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -327,7 +327,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -374,7 +374,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -554,7 +554,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -601,7 +601,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -745,7 +745,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -792,7 +792,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -938,7 +938,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -985,7 +985,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1032,7 +1032,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1189,7 +1189,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1236,7 +1236,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1283,7 +1283,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1447,7 +1447,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1494,7 +1494,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1621,7 +1621,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1668,7 +1668,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1835,7 +1835,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1882,7 +1882,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1929,7 +1929,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2123,7 +2123,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2170,7 +2170,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2326,7 +2326,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2589,7 +2589,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2636,7 +2636,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2888,7 +2888,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2935,7 +2935,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2982,7 +2982,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3029,7 +3029,7 @@ STAGE PLANS: partition values: ds 2000-04-11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3208,7 +3208,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3255,7 +3255,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3302,7 +3302,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3470,13 +3470,9 @@ STAGE DEPENDENCIES: Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 + Stage-1 depends on stages: Stage-2 Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -3522,6 +3518,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 @@ -3558,6 +3570,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3569,7 +3608,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3610,6 +3649,35 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-08 [pcr_t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -3649,8 +3717,14 @@ STAGE PLANS: name: default.pcr_t2 Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Column Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t2 + Is Table Level Stats: true Stage: Stage-4 Map Reduce @@ -3832,15 +3906,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -3870,188 +3935,83 @@ STAGE PLANS: name: default.pcr_t3 Stage: Stage-9 - Stats-Aggr Operator -#### A masked pattern was here #### + Column Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t3 + Is Table Level Stats: true Stage: Stage-10 Map Reduce Map Operator Tree: TableScan GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - name: default.pcr_t3 + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: #### A masked pattern was here #### - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - name: default.pcr_t3 - Truncated Path -> Alias: + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: from pcr_t1 insert overwrite table pcr_t2 select key, value where ds='2000-04-08' @@ -4092,13 +4052,9 @@ STAGE DEPENDENCIES: Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 + Stage-1 depends on stages: Stage-2 Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -4127,7 +4083,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4148,6 +4104,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: 2 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: (key = 3) (type: boolean) @@ -4167,7 +4139,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4188,6 +4160,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: 3 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4199,7 +4198,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4240,6 +4239,35 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-08 [pcr_t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -4259,7 +4287,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4279,8 +4307,14 @@ STAGE PLANS: name: default.pcr_t2 Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Column Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t2 + Is Table Level Stats: true Stage: Stage-4 Map Reduce @@ -4296,7 +4330,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4326,7 +4360,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4347,7 +4381,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4383,7 +4417,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4413,7 +4447,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4434,7 +4468,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4462,15 +4496,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -4480,7 +4505,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4500,188 +4525,83 @@ STAGE PLANS: name: default.pcr_t3 Stage: Stage-9 - Stats-Aggr Operator -#### A masked pattern was here #### + Column Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t3 + Is Table Level Stats: true Stage: Stage-10 Map Reduce Map Operator Tree: TableScan GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - name: default.pcr_t3 + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: #### A masked pattern was here #### - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - name: default.pcr_t3 - Truncated Path -> Alias: + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: from pcr_t1 insert overwrite table pcr_t2 select key, value where ds='2000-04-08' and key=2 diff --git a/ql/src/test/results/clientpositive/pointlookup2.q.out b/ql/src/test/results/clientpositive/pointlookup2.q.out index 158e88522f..c99a3223e4 100644 --- a/ql/src/test/results/clientpositive/pointlookup2.q.out +++ b/ql/src/test/results/clientpositive/pointlookup2.q.out @@ -136,7 +136,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -183,7 +183,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -334,7 +334,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -557,7 +557,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -604,7 +604,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -821,7 +821,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -868,7 +868,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -913,7 +913,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns ds,key,value @@ -934,7 +934,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns ds,key,value @@ -1136,7 +1136,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1183,7 +1183,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1230,7 +1230,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1275,7 +1275,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns ds,key,value @@ -1296,7 +1296,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns ds,key,value diff --git a/ql/src/test/results/clientpositive/pointlookup3.q.out b/ql/src/test/results/clientpositive/pointlookup3.q.out index eb61e17cd9..0057d1df94 100644 --- a/ql/src/test/results/clientpositive/pointlookup3.q.out +++ b/ql/src/test/results/clientpositive/pointlookup3.q.out @@ -91,7 +91,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -139,7 +139,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -267,7 +267,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -418,7 +418,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -642,7 +642,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -690,7 +690,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -908,7 +908,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -956,7 +956,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1004,7 +1004,7 @@ STAGE PLANS: ds1 2000-04-10 ds2 2001-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/pointlookup4.q.out b/ql/src/test/results/clientpositive/pointlookup4.q.out index 8ef5551369..3c9cc60903 100644 --- a/ql/src/test/results/clientpositive/pointlookup4.q.out +++ b/ql/src/test/results/clientpositive/pointlookup4.q.out @@ -91,7 +91,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -139,7 +139,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -267,7 +267,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -315,7 +315,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/ppd_constant_expr.q.out b/ql/src/test/results/clientpositive/ppd_constant_expr.q.out index cbe76549f7..3d96f3cd0e 100644 --- a/ql/src/test/results/clientpositive/ppd_constant_expr.q.out +++ b/ql/src/test/results/clientpositive/ppd_constant_expr.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -43,6 +44,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.ppd_constant_expr + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: c1, c2, c3 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -66,6 +93,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3 + Column Types: string, int, double + Table: default.ppd_constant_expr + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -156,6 +190,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -179,6 +214,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.ppd_constant_expr + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: c1, c2, c3 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -202,6 +263,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3 + Column Types: string, int, double + Table: default.ppd_constant_expr + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/ppd_join5.q.out b/ql/src/test/results/clientpositive/ppd_join5.q.out index 551769c73a..1cd3a7a423 100644 --- a/ql/src/test/results/clientpositive/ppd_join5.q.out +++ b/ql/src/test/results/clientpositive/ppd_join5.q.out @@ -32,7 +32,7 @@ POSTHOOK: Lineage: t1.id1 SIMPLE [] POSTHOOK: Lineage: t1.id2 SIMPLE [] POSTHOOK: Lineage: t2.d SIMPLE [] POSTHOOK: Lineage: t2.id SIMPLE [] -Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select a.*,b.d d1,c.d d2 from t1 a join t2 b on (a.id1 = b.id) @@ -63,35 +63,32 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan - alias: b + alias: c Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d <= 1) and id is not null) (type: boolean) + predicate: (d <= 1) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: id (type: string), d (type: int) - outputColumnNames: _col0, _col1 + expressions: d (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col0 (type: string) + sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -104,34 +101,38 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int) + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) TableScan - alias: c + alias: b Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d <= 1) (type: boolean) + predicate: ((d <= 1) and id is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d (type: int) - outputColumnNames: _col0 + expressions: id (type: string), d (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: + key expressions: _col0 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col0 (type: string) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col1 (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col3, _col4 + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int), _col4 (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -148,7 +149,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from ( select a.*,b.d d1,c.d d2 from @@ -183,35 +184,32 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan - alias: b + alias: c Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d <= 1) and id is not null) (type: boolean) + predicate: (d <= 1) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: id (type: string), d (type: int) - outputColumnNames: _col0, _col1 + expressions: d (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col0 (type: string) + sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -224,37 +222,41 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int) + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) TableScan - alias: c + alias: b Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d <= 1) (type: boolean) + predicate: ((d <= 1) and id is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d (type: int) - outputColumnNames: _col0 + expressions: id (type: string), d (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: + key expressions: _col0 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col0 (type: string) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col1 (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col3, _col4 + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col3 > 1) or (_col4 > 1)) (type: boolean) + predicate: ((_col4 > 1) or (_col2 > 1)) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int), _col4 (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -271,7 +273,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select * from ( select a.*,b.d d1,c.d d2 from t1 a join t2 b on (a.id1 = b.id) diff --git a/ql/src/test/results/clientpositive/ppd_multi_insert.q.out b/ql/src/test/results/clientpositive/ppd_multi_insert.q.out index 7e501c71c0..baf1f4519e 100644 --- a/ql/src/test/results/clientpositive/ppd_multi_insert.q.out +++ b/ql/src/test/results/clientpositive/ppd_multi_insert.q.out @@ -48,10 +48,16 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-12 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-3 + Stage-13 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-3 + Stage-14 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-3 + Stage-6 depends on stages: Stage-4 Stage-1 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-4 Stage-2 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-2 + Stage-10 depends on stages: Stage-4 Stage-3 depends on stages: Stage-4 STAGE PLANS: @@ -105,6 +111,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -120,6 +141,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -135,6 +171,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (_col0 >= 300) (type: boolean) Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE @@ -163,6 +215,49 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi2 + + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.mi3 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -173,9 +268,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 - Stage: Stage-6 + Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 Move Operator tables: @@ -189,9 +306,38 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-3 Move Operator files: @@ -1313,10 +1459,16 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-12 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-3 + Stage-13 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-3 + Stage-14 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-3 + Stage-6 depends on stages: Stage-4 Stage-1 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-4 Stage-2 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-2 + Stage-10 depends on stages: Stage-4 Stage-3 depends on stages: Stage-4 STAGE PLANS: @@ -1370,6 +1522,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -1385,6 +1552,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -1400,6 +1582,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (_col0 >= 300) (type: boolean) Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE @@ -1428,6 +1626,49 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi2 + + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.mi3 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -1438,9 +1679,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 - Stage: Stage-6 + Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 Move Operator tables: @@ -1454,9 +1717,38 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-3 Move Operator files: diff --git a/ql/src/test/results/clientpositive/push_or.q.out b/ql/src/test/results/clientpositive/push_or.q.out index dacdc40192..913fc6728b 100644 --- a/ql/src/test/results/clientpositive/push_or.q.out +++ b/ql/src/test/results/clientpositive/push_or.q.out @@ -73,7 +73,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -120,7 +120,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/quote1.q.out b/ql/src/test/results/clientpositive/quote1.q.out index f8592c4e00..f4fdc3f046 100644 --- a/ql/src/test/results/clientpositive/quote1.q.out +++ b/ql/src/test/results/clientpositive/quote1.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -46,6 +47,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string) + outputColumnNames: location, type, table + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(location, 16), compute_stats(type, 16) + keys: table (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -71,6 +106,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: location, type + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out b/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out index 9b29136ba1..b3ae91be2f 100644 --- a/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out +++ b/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -72,6 +73,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(hr, 16), compute_stats(ds, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -174,6 +191,35 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [a] /srcpart/ds=2008-04-08/hr=12 [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -216,6 +262,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: string, string, string, string + Table: default.tmptable + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/rcfile_default_format.q.out b/ql/src/test/results/clientpositive/rcfile_default_format.q.out index 97bc8d2c66..d8b9860674 100644 --- a/ql/src/test/results/clientpositive/rcfile_default_format.q.out +++ b/ql/src/test/results/clientpositive/rcfile_default_format.q.out @@ -121,7 +121,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} numFiles 1 numRows 500 rawDataSize 1406 diff --git a/ql/src/test/results/clientpositive/rcfile_null_value.q.out b/ql/src/test/results/clientpositive/rcfile_null_value.q.out index f3ab47cfe9..ad0944f5b6 100644 --- a/ql/src/test/results/clientpositive/rcfile_null_value.q.out +++ b/ql/src/test/results/clientpositive/rcfile_null_value.q.out @@ -91,6 +91,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -149,6 +151,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.dest1_rc + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -163,6 +180,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1_rc + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/sample1.q.out b/ql/src/test/results/clientpositive/sample1.q.out index dec9b233b3..f51f0d2271 100644 --- a/ql/src/test/results/clientpositive/sample1.q.out +++ b/ql/src/test/results/clientpositive/sample1.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -74,6 +75,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, dt, hr + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(dt, 16), compute_stats(hr, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -127,6 +144,35 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -169,6 +215,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value, dt, hr + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/sample2.q.out b/ql/src/test/results/clientpositive/sample2.q.out index f54c57363a..9619e69ce3 100644 --- a/ql/src/test/results/clientpositive/sample2.q.out +++ b/ql/src/test/results/clientpositive/sample2.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -73,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -126,6 +143,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -168,6 +214,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/sample4.q.out b/ql/src/test/results/clientpositive/sample4.q.out index 675fda9ef4..5b678f0489 100644 --- a/ql/src/test/results/clientpositive/sample4.q.out +++ b/ql/src/test/results/clientpositive/sample4.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -73,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -126,6 +143,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -168,6 +214,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/sample5.q.out b/ql/src/test/results/clientpositive/sample5.q.out index 583784e9b7..8947da0ed7 100644 --- a/ql/src/test/results/clientpositive/sample5.q.out +++ b/ql/src/test/results/clientpositive/sample5.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -74,6 +75,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -127,6 +144,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -169,6 +215,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/sample6.q.out b/ql/src/test/results/clientpositive/sample6.q.out index 36e6906785..fd434afab2 100644 --- a/ql/src/test/results/clientpositive/sample6.q.out +++ b/ql/src/test/results/clientpositive/sample6.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -73,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -126,6 +143,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -168,6 +214,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/sample7.q.out b/ql/src/test/results/clientpositive/sample7.q.out index f0d9088174..880c0887db 100644 --- a/ql/src/test/results/clientpositive/sample7.q.out +++ b/ql/src/test/results/clientpositive/sample7.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -74,6 +75,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -127,6 +144,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -169,6 +215,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/skewjoin.q.out b/ql/src/test/results/clientpositive/skewjoin.q.out index cd7d6fa053..00f645adc1 100644 --- a/ql/src/test/results/clientpositive/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/skewjoin.q.out @@ -80,11 +80,13 @@ INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-5 depends on stages: Stage-1 , consists of Stage-6, Stage-0 - Stage-6 - Stage-4 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-1 , consists of Stage-7, Stage-0, Stage-3 + Stage-7 + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -143,11 +145,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 + Stage: Stage-6 Conditional Operator - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: 1 @@ -161,7 +178,7 @@ STAGE PLANS: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -184,6 +201,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -200,6 +232,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out index b53e6704cc..bda35116bb 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out @@ -50,6 +50,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -93,7 +95,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -172,6 +174,32 @@ STAGE PLANS: TotalFiles: 16 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -205,6 +233,86 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: '1' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '1' (type: string) + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 @@ -1827,7 +1935,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -1936,7 +2044,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_12.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_12.q.out index 9928a60095..907280384a 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_12.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_12.q.out @@ -62,6 +62,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -129,7 +130,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -207,6 +208,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 @@ -254,6 +263,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -399,6 +409,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out index 49ff6355b3..3bc00c8973 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out @@ -112,7 +112,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -135,7 +135,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -292,7 +292,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -315,7 +315,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out index 6c411716e7..76fde24987 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out @@ -42,6 +42,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -73,6 +75,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -89,6 +107,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY @@ -157,6 +211,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -193,6 +248,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY @@ -273,6 +335,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -304,6 +368,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + keys: '2' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -320,3 +400,39 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2' (type: string) + sort order: + + Map-reduce partition columns: '2' (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '2' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '2' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out index c0fdfd38d2..0ceb6a5cfa 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out @@ -42,6 +42,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -78,6 +79,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -108,6 +116,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -139,6 +149,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -155,6 +181,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -185,6 +247,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -215,6 +279,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -231,6 +311,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -261,6 +377,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -292,6 +410,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -308,6 +442,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -338,6 +508,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -369,6 +541,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -385,6 +573,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -415,6 +639,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -445,6 +671,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -461,3 +703,39 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_22.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_22.q.out index 36e879236f..8bd0fd64d0 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_22.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_22.q.out @@ -40,6 +40,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -74,6 +75,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT * FROM test_table1 PREHOOK: type: QUERY @@ -180,6 +188,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -214,6 +223,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT * FROM test_table1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_7.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_7.q.out index 82f5804eea..20d51db251 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_7.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_7.q.out @@ -620,6 +620,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -648,6 +649,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -671,6 +693,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out b/ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out index 40c0ec5b59..5754a74478 100644 --- a/ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out +++ b/ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out @@ -101,7 +101,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 1 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out b/ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out index 0740df3079..5dc2426fe4 100644 --- a/ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out +++ b/ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out @@ -75,7 +75,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -184,7 +184,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out b/ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out index fc5066c0f7..28501ab1d0 100644 --- a/ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out +++ b/ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out @@ -111,7 +111,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -158,7 +158,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -259,7 +259,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -308,7 +308,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out b/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out index 0d5ba01960..f0cf74ca81 100644 --- a/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out +++ b/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out @@ -89,7 +89,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 1500 rawDataSize 141000 @@ -241,7 +241,7 @@ Database: default Table: src_orc_merge_test_part_stat #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 1500 rawDataSize 141000 @@ -290,7 +290,7 @@ Database: default Table: src_orc_merge_test_part_stat #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 1500 rawDataSize 141000 diff --git a/ql/src/test/results/clientpositive/spark/auto_join1.q.out b/ql/src/test/results/clientpositive/spark/auto_join1.q.out index d9cd7700cc..74ec63d273 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join1.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join1.q.out @@ -19,13 +19,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -46,6 +47,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -68,7 +71,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col2 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) @@ -82,8 +85,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -98,6 +128,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/auto_join14.q.out b/ql/src/test/results/clientpositive/spark/auto_join14.q.out index 82deefea73..fa5c433fd3 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join14.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join14.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -46,6 +47,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -82,8 +85,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c1, c2 + Statistics: Num rows: 366 Data size: 3890 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -98,6 +128,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.dest1 + PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/auto_join17.q.out b/ql/src/test/results/clientpositive/spark/auto_join17.q.out index 6d63fa68a3..f818416d3b 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join17.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join17.q.out @@ -19,13 +19,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -46,6 +47,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -68,7 +71,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) @@ -82,8 +85,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(value1, 16), compute_stats(key2, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -98,6 +128,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/auto_join19.q.out b/ql/src/test/results/clientpositive/spark/auto_join19.q.out index 88ef3f1981..fd4f1041c8 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join19.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join19.q.out @@ -21,13 +21,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -48,6 +49,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -70,7 +73,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col4 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col4 (type: string) @@ -84,8 +87,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -100,6 +130,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') diff --git a/ql/src/test/results/clientpositive/spark/auto_join2.q.out b/ql/src/test/results/clientpositive/spark/auto_join2.q.out index e32abba2f4..5a98744b76 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join2.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join2.q.out @@ -19,13 +19,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -43,7 +44,7 @@ STAGE PLANS: 1 _col0 (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan alias: src3 @@ -64,6 +65,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -86,7 +89,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -96,7 +99,7 @@ STAGE PLANS: 1 UDFToDouble(_col0) (type: double) outputColumnNames: _col0, _col3 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col3 (type: string) @@ -110,8 +113,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -126,6 +156,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j2 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/auto_join26.q.out b/ql/src/test/results/clientpositive/spark/auto_join26.q.out index a0deaabd87..299dbcc7ca 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join26.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join26.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -48,6 +49,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -106,6 +108,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -120,6 +149,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/auto_join3.q.out b/ql/src/test/results/clientpositive/spark/auto_join3.q.out index a17cc1a0a1..31d1017307 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join3.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join3.q.out @@ -19,13 +19,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -44,7 +45,7 @@ STAGE PLANS: 2 _col0 (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan alias: src3 @@ -66,6 +67,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -90,8 +93,8 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col0, _col3 input vertices: - 1 Map 2 - 2 Map 3 + 1 Map 3 + 2 Map 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col3 (type: string) @@ -105,8 +108,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -121,6 +151,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/auto_join4.q.out b/ql/src/test/results/clientpositive/spark/auto_join4.q.out index b212b54bf1..facd25568a 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join4.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join4.q.out @@ -41,13 +41,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -68,6 +69,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -90,7 +93,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) @@ -104,8 +107,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -120,6 +150,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/spark/auto_join5.q.out b/ql/src/test/results/clientpositive/spark/auto_join5.q.out index 422623891a..83faac6d8d 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join5.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join5.q.out @@ -41,6 +41,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -68,6 +69,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -104,8 +107,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -120,6 +150,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/spark/auto_join6.q.out b/ql/src/test/results/clientpositive/spark/auto_join6.q.out index 4f1e6316b8..2033b0594b 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join6.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join6.q.out @@ -40,12 +40,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +68,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -106,6 +108,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -120,6 +149,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/spark/auto_join7.q.out b/ql/src/test/results/clientpositive/spark/auto_join7.q.out index 0e6e2f5b73..48d59c9a94 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join7.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join7.q.out @@ -50,12 +50,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -76,7 +78,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -94,7 +96,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 4 + Map 5 Map Operator Tree: TableScan alias: src3 @@ -136,6 +138,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -150,6 +179,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6 + Column Types: int, string, int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/spark/auto_join8.q.out b/ql/src/test/results/clientpositive/spark/auto_join8.q.out index 5fdc5dfd05..70f57b6beb 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join8.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join8.q.out @@ -41,13 +41,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -68,6 +69,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -90,7 +93,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col2 is null (type: boolean) @@ -107,8 +110,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -123,6 +153,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/spark/auto_join9.q.out b/ql/src/test/results/clientpositive/spark/auto_join9.q.out index b2ed51c108..0187f424a2 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join9.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join9.q.out @@ -19,13 +19,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -46,6 +47,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -68,7 +71,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col4 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col4 (type: string) @@ -82,8 +85,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -98,6 +128,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out b/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out index 4527b35ae1..7cc0f70c83 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out @@ -126,7 +126,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -147,7 +147,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -199,7 +199,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -220,7 +220,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -272,7 +272,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -293,7 +293,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -345,7 +345,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -366,7 +366,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -418,7 +418,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}} bucket_count -1 column.name.delimiter , columns userid @@ -439,7 +439,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}} bucket_count -1 column.name.delimiter , columns userid diff --git a/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out index 602ccb21da..1dd6b5d061 100644 --- a/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out @@ -353,7 +353,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1327,15 +1327,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: a @@ -1371,6 +1376,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) File Output Operator compressed: false Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE @@ -1379,6 +1397,71 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1393,6 +1476,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -1525,6 +1622,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -1532,10 +1631,12 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: a @@ -1564,6 +1665,39 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE @@ -1582,6 +1716,20 @@ STAGE PLANS: Reducer 2 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial @@ -1599,6 +1747,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1613,6 +1788,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out index cd4b83a2a1..b303f7c33b 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out @@ -70,15 +70,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: a @@ -110,6 +115,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -122,6 +140,71 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -136,6 +219,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -255,15 +352,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: a @@ -295,6 +397,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -307,6 +422,71 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -321,6 +501,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -438,9 +632,12 @@ INSERT OVERWRITE TABLE dest2 select value1, value2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4 + Stage-8 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -449,7 +646,30 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-6 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: b @@ -470,9 +690,12 @@ STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: a @@ -492,7 +715,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 + 1 Map 4 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int) @@ -506,6 +729,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -520,6 +756,75 @@ STAGE PLANS: name: default.dest2 Local Work: Map Reduce Local Work + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Local Work: + Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -534,6 +839,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -581,28 +900,6 @@ POSTHOOK: query: select * from dest1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -0 0 -0 0 -0 0 -0 0 -0 0 -0 0 -0 0 -0 0 -0 0 -2 2 -4 4 -5 5 -5 5 -5 5 -5 5 -5 5 -5 5 -5 5 -5 5 -5 5 -8 8 -9 9 PREHOOK: query: select * from dest2 PREHOOK: type: QUERY PREHOOK: Input: default@dest2 @@ -611,25 +908,3 @@ POSTHOOK: query: select * from dest2 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest2 #### A masked pattern was here #### -val_0 val_0 -val_0 val_0 -val_0 val_0 -val_0 val_0 -val_0 val_0 -val_0 val_0 -val_0 val_0 -val_0 val_0 -val_0 val_0 -val_2 val_2 -val_4 val_4 -val_5 val_5 -val_5 val_5 -val_5 val_5 -val_5 val_5 -val_5 val_5 -val_5 val_5 -val_5 val_5 -val_5 val_5 -val_5 val_5 -val_8 val_8 -val_9 val_9 diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out index de08021150..f3d4b7e273 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out @@ -82,8 +82,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -98,20 +99,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map 4 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 5 Map Operator Tree: TableScan alias: c @@ -128,6 +122,23 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -136,6 +147,21 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -146,7 +172,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -192,8 +218,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -208,20 +235,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map 4 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 5 Map Operator Tree: TableScan alias: d @@ -238,6 +258,23 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -246,6 +283,21 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -256,7 +308,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -302,8 +354,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -318,20 +371,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map 4 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 5 Map Operator Tree: TableScan alias: h @@ -348,6 +394,23 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -356,6 +419,21 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -366,7 +444,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -412,8 +490,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -428,20 +507,13 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map 4 + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 5 Map Operator Tree: TableScan alias: c @@ -458,6 +530,23 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -466,6 +555,21 @@ STAGE PLANS: keys: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -476,7 +580,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -601,8 +705,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -617,20 +722,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map 4 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 5 Map Operator Tree: TableScan alias: c @@ -647,6 +745,23 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -655,6 +770,21 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -665,7 +795,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -711,8 +841,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -727,20 +858,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map 4 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 5 Map Operator Tree: TableScan alias: c @@ -757,6 +881,23 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -765,6 +906,21 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -775,7 +931,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -821,8 +977,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -837,20 +994,13 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map 4 + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 5 Map Operator Tree: TableScan alias: c @@ -867,6 +1017,23 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -875,6 +1042,21 @@ STAGE PLANS: keys: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -885,7 +1067,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1010,8 +1192,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1026,20 +1209,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map 4 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 5 Map Operator Tree: TableScan alias: c @@ -1056,6 +1232,23 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -1064,6 +1257,21 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1074,7 +1282,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out index cdb69db270..d15d9c14f7 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out @@ -504,7 +504,7 @@ STAGE PLANS: 1 Reducer 4 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2128,7 +2128,7 @@ STAGE PLANS: 1 Reducer 5 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/bucket2.q.out b/ql/src/test/results/clientpositive/spark/bucket2.q.out index 90c9e5469d..8c3938b5b9 100644 --- a/ql/src/test/results/clientpositive/spark/bucket2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket2.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -135,6 +136,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -169,6 +205,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket2_1 + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucket2_1 select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/bucket3.q.out b/ql/src/test/results/clientpositive/spark/bucket3.q.out index 078460f9b9..4c9fe0232c 100644 --- a/ql/src/test/results/clientpositive/spark/bucket3.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket3.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -133,6 +135,60 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -166,6 +222,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket3_1 + Is Table Level Stats: false + PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1') select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/bucket4.q.out b/ql/src/test/results/clientpositive/spark/bucket4.q.out index 13e21b6610..f008c89872 100644 --- a/ql/src/test/results/clientpositive/spark/bucket4.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket4.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -137,6 +138,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -172,6 +208,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket4_1 + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucket4_1 select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/bucket5.q.out b/ql/src/test/results/clientpositive/spark/bucket5.q.out index dd24db8e3e..2f613ae8c7 100644 --- a/ql/src/test/results/clientpositive/spark/bucket5.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket5.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,11 +37,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -109,7 +113,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -220,9 +224,55 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: Select Operator expressions: UDFToInteger(KEY.reducesinkkey0) (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 @@ -259,6 +309,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 5 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -294,6 +390,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucketed_table + Is Table Level Stats: true + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.unbucketed_table + Is Table Level Stats: true + Stage: Stage-1 Move Operator tables: @@ -362,7 +474,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} SORTBUCKETCOLSPREFIX TRUE numFiles 2 numRows 500 diff --git a/ql/src/test/results/clientpositive/spark/bucket6.q.out b/ql/src/test/results/clientpositive/spark/bucket6.q.out index d5d53d303d..c93bf3fea8 100644 --- a/ql/src/test/results/clientpositive/spark/bucket6.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket6.q.out @@ -16,12 +16,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -53,6 +55,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_bucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -67,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_bucket + PREHOOK: query: insert into table src_bucket select key,value from srcpart PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark1.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark1.q.out index 72297343ca..10459d3399 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark1.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark1.q.out @@ -121,13 +121,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -209,6 +210,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -233,7 +236,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true @@ -273,6 +276,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -333,6 +352,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -366,6 +415,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -428,13 +485,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -511,6 +569,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -535,7 +595,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -553,7 +613,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -574,6 +634,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -629,6 +705,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -639,7 +745,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -662,6 +768,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark2.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark2.q.out index 4592f214e0..113b7c975b 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark2.q.out @@ -105,13 +105,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -193,6 +194,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -217,7 +220,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true @@ -257,6 +260,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -317,6 +336,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -350,6 +399,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -412,13 +469,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -500,6 +558,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -524,7 +584,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true @@ -543,7 +603,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -564,6 +624,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -624,6 +700,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -634,7 +740,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -657,6 +763,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out index add6ac48d2..b8034d25f3 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out @@ -105,6 +105,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -193,6 +194,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -257,6 +260,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -317,6 +336,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -350,6 +399,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -412,6 +469,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -495,6 +553,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -537,7 +597,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -558,6 +618,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -613,6 +689,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -623,7 +729,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -646,6 +752,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark4.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark4.q.out index bd67ee3b9a..d3c9893d31 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark4.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark4.q.out @@ -111,7 +111,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -134,7 +134,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -187,7 +187,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -210,7 +210,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -317,7 +317,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -340,7 +340,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -490,7 +490,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -513,7 +513,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -566,7 +566,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -589,7 +589,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -690,7 +690,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -713,7 +713,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out index 08d115d105..a5d8970714 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out @@ -350,7 +350,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: a @@ -390,10 +390,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: b @@ -411,31 +411,35 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0 + outputColumnNames: _col1 input vertices: - 0 Map 1 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 4 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 4 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -503,36 +507,36 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) 1 _col0 (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 4 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + alias: d + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) @@ -543,10 +547,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: b @@ -564,31 +568,35 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0 + outputColumnNames: _col1 input vertices: - 0 Map 2 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 0 Map 1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 4 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 4 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -764,7 +772,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: b @@ -778,8 +786,8 @@ STAGE PLANS: Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col1 (type: int) Local Work: Map Reduce Local Work @@ -787,7 +795,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: tab @@ -809,10 +817,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Map 2 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: tab_part @@ -832,7 +840,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), substr(_col2, 5) (type: string) @@ -852,7 +860,7 @@ STAGE PLANS: value expressions: _col1 (type: double) Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -870,14 +878,14 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 input vertices: - 1 Map 4 + 0 Map 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + expressions: _col0 (type: int), _col2 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -915,7 +923,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: y @@ -937,10 +945,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Map 2 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: x @@ -960,7 +968,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), substr(_col1, 5) (type: string) @@ -980,7 +988,7 @@ STAGE PLANS: value expressions: _col1 (type: double) Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -996,14 +1004,14 @@ STAGE PLANS: Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col1 (type: int) Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: b @@ -1019,14 +1027,14 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3 input vertices: - 0 Reducer 2 + 1 Reducer 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + expressions: _col3 (type: int), _col2 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1066,14 +1074,14 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -1085,14 +1093,14 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -1126,13 +1134,13 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col4 input vertices: 1 Map 2 2 Map 3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1671,19 +1679,19 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: string) - 1 _col1 (type: string) + 0 _col0 (type: int) + 1 _col0 (type: int) Local Work: Map Reduce Local Work @@ -1706,28 +1714,29 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col0 (type: int) Map 3 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reducer 2 Local Work: Map Reduce Local Work @@ -1736,22 +1745,22 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 input vertices: 1 Map 4 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col3 (type: int) + expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out index 729da5e0f1..b6e2403c2e 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out @@ -125,19 +125,19 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: string) - 1 _col1 (type: string) + 0 _col0 (type: int) + 1 _col0 (type: int) Local Work: Map Reduce Local Work @@ -160,28 +160,29 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col0 (type: int) Map 3 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reducer 2 Local Work: Map Reduce Local Work @@ -190,22 +191,22 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 input vertices: 1 Map 4 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col3 (type: int) + expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out index 5943fdfc0c..23ea0233e9 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out @@ -378,12 +378,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -460,7 +462,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -583,6 +585,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -616,6 +664,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin_part b @@ -733,12 +789,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -815,7 +873,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -917,7 +975,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -938,6 +996,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -948,7 +1052,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -971,6 +1075,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin_part b diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out index 9e44d1a68d..72a093e3bb 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out @@ -110,7 +110,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -205,7 +205,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name value column.name.delimiter , @@ -254,7 +254,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -409,7 +409,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -510,7 +510,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -670,7 +670,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -771,7 +771,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -931,7 +931,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -1032,7 +1032,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out index ed95995253..24a1b51bee 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out @@ -104,12 +104,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -188,7 +190,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -311,6 +313,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -344,6 +392,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -465,12 +521,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -549,7 +607,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -651,7 +709,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -672,6 +730,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -682,7 +786,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -705,6 +809,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -843,12 +955,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -927,7 +1041,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -1078,7 +1192,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1099,6 +1213,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1109,7 +1269,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1132,6 +1292,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out index 14139bb448..81c0709025 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out @@ -128,12 +128,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -212,7 +214,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -335,6 +337,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -368,6 +416,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b @@ -489,12 +545,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -573,7 +631,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -675,7 +733,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -696,6 +754,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -706,7 +810,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -729,6 +833,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out index cbc2110ff2..4b435095ea 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out @@ -128,12 +128,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -210,7 +212,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -331,6 +333,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -364,6 +412,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin b @@ -473,12 +529,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -555,7 +613,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -655,7 +713,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -676,6 +734,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -686,7 +790,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -709,6 +813,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin b diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out index a9415135fb..4a7a95d243 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out @@ -179,6 +179,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -261,6 +262,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -321,6 +324,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 121 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -430,6 +449,36 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [b] /srcbucket_mapjoin_part/ds=2008-04-09 [b] + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -463,6 +512,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin_part b @@ -585,6 +642,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -667,6 +725,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -706,7 +766,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -727,6 +787,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -836,6 +912,36 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -846,7 +952,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -869,6 +975,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out index 3e74c217e7..0ecccfe140 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out @@ -80,13 +80,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -159,6 +160,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -179,7 +182,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col6 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -218,6 +221,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -271,6 +290,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -304,3 +353,11 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out index a3acd5dd74..db7ec7cc0f 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out @@ -89,13 +89,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -222,6 +223,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -242,7 +245,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col6 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true @@ -282,6 +285,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -340,6 +359,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -373,3 +422,11 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + diff --git a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out index 81a064b2b7..e0706eae86 100644 --- a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out @@ -93,6 +93,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -122,6 +123,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -172,6 +174,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -188,6 +225,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -269,6 +313,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -298,6 +343,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -348,6 +394,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -364,6 +445,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -469,6 +557,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -498,6 +587,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -548,6 +638,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 184 Data size: 1619 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 184 Data size: 1619 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 184 Data size: 1619 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -564,6 +689,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -675,6 +807,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -704,6 +837,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -754,6 +888,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -770,6 +939,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM @@ -863,6 +1039,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -892,6 +1069,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -942,6 +1120,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -958,6 +1171,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.v1, b.v2) FROM @@ -1051,6 +1271,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -1080,6 +1301,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -1130,6 +1352,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1146,6 +1403,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key+a.key, concat(a.value, b.value) FROM diff --git a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_4.q.out b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_4.q.out index 4245aa1d99..f23579dafb 100644 --- a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_4.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_4.q.out @@ -69,6 +69,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -94,6 +95,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -140,6 +142,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, key2, value + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -156,6 +193,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -256,6 +300,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -285,6 +330,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -331,6 +377,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -347,6 +428,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a JOIN test_table2 b diff --git a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_6.q.out b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_6.q.out index 5e4e5ef8ad..66fc9ac830 100644 --- a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_6.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_6.q.out @@ -71,6 +71,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -100,6 +101,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -150,6 +152,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -166,6 +203,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.key2, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -254,6 +298,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -283,6 +328,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -333,6 +379,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -349,6 +430,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq1.key, subq1.key2, subq1.value from ( @@ -437,6 +525,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -466,6 +555,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -516,6 +606,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -532,6 +657,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq1.key2, subq1.key, subq1.value from @@ -555,6 +687,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -584,6 +717,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -634,6 +768,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -650,6 +819,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq2.key, subq2.key2, subq2.value from @@ -679,6 +855,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -708,6 +885,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -758,6 +936,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -774,6 +987,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq2.key, subq2.key2, subq2.value from ( @@ -880,6 +1100,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -909,6 +1130,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -959,6 +1181,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -975,6 +1232,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq2.k2, subq2.k1, subq2.value from ( @@ -1091,6 +1355,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -1120,6 +1385,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -1170,6 +1436,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table4 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1186,3 +1487,10 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table4 + diff --git a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out index 7c23da7cc2..aab48ebc64 100644 --- a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out @@ -71,6 +71,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -100,6 +101,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -150,6 +152,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -166,6 +203,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -251,6 +295,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -280,6 +325,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -330,6 +376,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -346,6 +427,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM @@ -437,6 +525,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -466,6 +555,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -516,6 +606,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 30 Data size: 269 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 269 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 30 Data size: 269 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 134 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 134 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 134 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -532,6 +657,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM diff --git a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_8.q.out b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_8.q.out index 8263b6f681..2c0be3fe25 100644 --- a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_8.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_8.q.out @@ -69,6 +69,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -98,6 +99,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -148,6 +150,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -164,6 +201,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, b.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -246,6 +290,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -275,6 +320,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -325,6 +371,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -341,6 +422,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT b.key, a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b diff --git a/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out b/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out index 8693026c71..41ed32af7c 100644 --- a/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out +++ b/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -135,6 +136,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -169,6 +205,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket2_1 + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucket2_1 select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out b/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out index 38ea9bdcab..69f807146b 100644 --- a/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out +++ b/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out @@ -176,6 +176,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -183,11 +185,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 31) - Reducer 3 <- Map 4 (GROUP, 31) + Reducer 2 <- Map 6 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 6 (GROUP, 31) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -228,9 +232,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -248,6 +279,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -262,6 +320,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -335,14 +407,16 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1), Reducer 5 (GROUP, 1), Reducer 7 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -361,7 +435,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: s2 @@ -377,7 +451,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 5 + Map 6 Map Operator Tree: TableScan alias: s3 @@ -416,7 +490,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -439,7 +540,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -462,6 +576,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -476,6 +603,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + PREHOOK: query: SELECT * FROM tmptable x SORT BY x.key PREHOOK: type: QUERY PREHOOK: Input: default@tmptable diff --git a/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out b/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out index 52aa055cf9..8c680bf73c 100644 --- a/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out +++ b/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out @@ -73,7 +73,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -148,7 +148,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -222,7 +222,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/spark/groupby10.q.out b/ql/src/test/results/clientpositive/spark/groupby10.q.out index b572995b32..c646733438 100644 --- a/ql/src/test/results/clientpositive/spark/groupby10.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby10.q.out @@ -44,6 +44,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -51,13 +53,17 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) - Reducer 4 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 6 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 2) + Reducer 8 <- Reducer 7 (GROUP, 2) + Reducer 9 <- Reducer 8 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 10 Map Operator Tree: TableScan alias: input @@ -105,9 +111,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) Reducer 4 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator aggregations: sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: int) mode: partial1 @@ -119,7 +159,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double) - Reducer 5 + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) @@ -139,6 +179,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -153,6 +227,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -256,6 +344,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -263,13 +353,17 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) - Reducer 4 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 6 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 2) + Reducer 8 <- Reducer 7 (GROUP, 2) + Reducer 9 <- Reducer 8 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 10 Map Operator Tree: TableScan alias: input @@ -317,9 +411,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) Reducer 4 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator aggregations: sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: int) mode: partial1 @@ -331,7 +459,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double) - Reducer 5 + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) @@ -351,6 +479,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -365,6 +527,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -468,6 +644,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -475,7 +653,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -492,7 +673,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: int) Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE @@ -514,6 +723,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0), avg(DISTINCT KEY._col1:1._col0) keys: KEY._col0 (type: int) @@ -532,6 +749,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(DISTINCT KEY._col1:0._col0), avg(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) Stage: Stage-0 Move Operator @@ -546,6 +785,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby11.q.out b/ql/src/test/results/clientpositive/spark/groupby11.q.out index a0f99c4cfa..2e01db8e46 100644 --- a/ql/src/test/results/clientpositive/spark/groupby11.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby11.q.out @@ -32,6 +32,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -39,13 +41,17 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) - Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 2) Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 6 <- Map 11 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 2) + Reducer 8 <- Reducer 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 9 <- Reducer 8 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 6 + Map 10 Map Operator Tree: TableScan alias: src @@ -59,7 +65,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 7 + Map 11 Map Operator Tree: TableScan alias: src @@ -107,9 +113,52 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), val1 (type: int), val2 (type: int) Reducer 4 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + keys: '111' (type: string) + mode: partial1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: '111' (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '111' (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: partial1 @@ -121,7 +170,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 5 + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), count(VALUE._col1) @@ -141,6 +190,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), val1 (type: int), val2 (type: int) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + keys: '111' (type: string) + mode: partial1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: '111' (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '111' (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -157,6 +249,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby1_map.q.out b/ql/src/test/results/clientpositive/spark/groupby1_map.q.out index b414aa62a3..9556c8235f 100644 --- a/ql/src/test/results/clientpositive/spark/groupby1_map.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby1_map.q.out @@ -16,12 +16,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -65,6 +67,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -79,6 +108,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out b/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out index a01cee1d49..b9f06f4ab0 100644 --- a/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out @@ -16,12 +16,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -65,6 +67,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -79,6 +108,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/spark/groupby1_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby1_map_skew.q.out index f7b7f7a185..22d008473a 100644 --- a/ql/src/test/results/clientpositive/spark/groupby1_map_skew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby1_map_skew.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -23,6 +24,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -80,6 +82,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -94,6 +123,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/spark/groupby1_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby1_noskew.q.out index 1b7e53b3eb..55270c43eb 100644 --- a/ql/src/test/results/clientpositive/spark/groupby1_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby1_noskew.q.out @@ -16,12 +16,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -59,6 +61,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: double) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -73,6 +97,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest_g1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/spark/groupby2.q.out b/ql/src/test/results/clientpositive/spark/groupby2.q.out index a5cd0e6066..bea7096412 100644 --- a/ql/src/test/results/clientpositive/spark/groupby2.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby2.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -25,6 +26,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -75,6 +78,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), c1 (type: int), c2 (type: string) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -89,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest_g2 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby2_map.q.out b/ql/src/test/results/clientpositive/spark/groupby2_map.q.out index d2b69af851..5f76784a0d 100644 --- a/ql/src/test/results/clientpositive/spark/groupby2_map.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby2_map.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -67,6 +69,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -81,6 +110,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby2_map_multi_distinct.q.out index 4ad056887d..927e84adfb 100644 --- a/ql/src/test/results/clientpositive/spark/groupby2_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby2_map_multi_distinct.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -67,6 +69,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -81,6 +110,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY @@ -126,12 +162,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -175,6 +213,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -189,6 +254,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby2_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby2_map_skew.q.out index f4a567ea9b..cac24f92e2 100644 --- a/ql/src/test/results/clientpositive/spark/groupby2_map_skew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby2_map_skew.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -25,6 +26,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -82,6 +84,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -96,6 +125,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby2_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby2_noskew.q.out index 8ecf769b43..507fea1f6b 100644 --- a/ql/src/test/results/clientpositive/spark/groupby2_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby2_noskew.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -60,6 +62,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), c1 (type: int), c2 (type: string) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -74,6 +98,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest_g2 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby2_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby2_noskew_multi_distinct.q.out index 3ede0fc755..deabca8ad7 100644 --- a/ql/src/test/results/clientpositive/spark/groupby2_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby2_noskew_multi_distinct.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -61,6 +63,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16), compute_stats(VALUE._col4, 16), compute_stats(VALUE._col5, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -75,6 +99,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby3.q.out b/ql/src/test/results/clientpositive/spark/groupby3.q.out index 23871ba526..9b184ece4c 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3.q.out @@ -36,6 +36,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -43,6 +44,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -68,6 +70,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: struct), _col3 (type: string), _col4 (type: string), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) Reducer 3 @@ -89,6 +92,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4452 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4452 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4488 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4488 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -103,6 +133,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/spark/groupby3_map.q.out b/ql/src/test/results/clientpositive/spark/groupby3_map.q.out index 71f8dc0191..465ccce291 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_map.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_map.q.out @@ -36,6 +36,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -83,6 +84,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -97,6 +118,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out index 47ef5cb6ae..c1ee7c7ca7 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out @@ -40,6 +40,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -87,6 +88,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16), compute_stats(c10, 16), compute_stats(c11, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5412 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5412 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -101,6 +122,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out index 7cfca81d71..0783098a7e 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out @@ -36,6 +36,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -96,6 +97,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -110,6 +131,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out index b2993a6e85..046325f10d 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out @@ -36,6 +36,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -76,6 +77,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4488 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4488 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -90,6 +107,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out index d152a07c77..5c998d0901 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out @@ -40,6 +40,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -80,6 +81,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16), compute_stats(c10, 16), compute_stats(c11, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5448 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5448 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -94,6 +111,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/spark/groupby4.q.out b/ql/src/test/results/clientpositive/spark/groupby4.q.out index 3ad01d0d70..1f2cf2ed77 100644 --- a/ql/src/test/results/clientpositive/spark/groupby4.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby4.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -25,6 +26,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -68,6 +71,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -82,6 +119,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby4_map.q.out b/ql/src/test/results/clientpositive/spark/groupby4_map.q.out index 39536bb633..61cdbb70e7 100644 --- a/ql/src/test/results/clientpositive/spark/groupby4_map.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby4_map.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -59,6 +60,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -73,6 +94,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/spark/groupby4_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby4_map_skew.q.out index 535e770496..84f79d981c 100644 --- a/ql/src/test/results/clientpositive/spark/groupby4_map_skew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby4_map_skew.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -59,6 +60,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -73,6 +94,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/spark/groupby4_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby4_noskew.q.out index 04f58fa671..6f293bd36a 100644 --- a/ql/src/test/results/clientpositive/spark/groupby4_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby4_noskew.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -55,6 +57,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -69,6 +93,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby5.q.out b/ql/src/test/results/clientpositive/spark/groupby5.q.out index d292f747e6..a85848247c 100644 --- a/ql/src/test/results/clientpositive/spark/groupby5.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby5.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -29,6 +30,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -80,6 +83,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -94,6 +131,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) FROM src diff --git a/ql/src/test/results/clientpositive/spark/groupby5_map.q.out b/ql/src/test/results/clientpositive/spark/groupby5_map.q.out index add30941b2..7ce80d9db6 100644 --- a/ql/src/test/results/clientpositive/spark/groupby5_map.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby5_map.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -61,6 +62,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -75,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/spark/groupby5_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby5_map_skew.q.out index 924ef5dbc7..973b9329dd 100644 --- a/ql/src/test/results/clientpositive/spark/groupby5_map_skew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby5_map_skew.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -61,6 +62,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -75,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/spark/groupby5_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby5_noskew.q.out index 300ccb64c3..e42bc98810 100644 --- a/ql/src/test/results/clientpositive/spark/groupby5_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby5_noskew.q.out @@ -22,12 +22,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -65,6 +67,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -79,6 +103,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) FROM src diff --git a/ql/src/test/results/clientpositive/spark/groupby6.q.out b/ql/src/test/results/clientpositive/spark/groupby6.q.out index 4f406d7d07..27878df8ac 100644 --- a/ql/src/test/results/clientpositive/spark/groupby6.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby6.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -25,6 +26,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -68,6 +71,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -82,6 +119,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby6_map.q.out b/ql/src/test/results/clientpositive/spark/groupby6_map.q.out index 03f68c63b2..3c83cebbdd 100644 --- a/ql/src/test/results/clientpositive/spark/groupby6_map.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby6_map.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -60,6 +62,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -74,6 +103,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby6_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby6_map_skew.q.out index 606b5d5272..0df968dd8c 100644 --- a/ql/src/test/results/clientpositive/spark/groupby6_map_skew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby6_map_skew.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -25,6 +26,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -73,6 +75,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -87,6 +116,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby6_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby6_noskew.q.out index eb72f011b2..8e76e98dd4 100644 --- a/ql/src/test/results/clientpositive/spark/groupby6_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby6_noskew.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -55,6 +57,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -69,6 +93,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby7_map.q.out b/ql/src/test/results/clientpositive/spark/groupby7_map.q.out index 31daab8acc..e732ee2528 100644 --- a/ql/src/test/results/clientpositive/spark/groupby7_map.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby7_map.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,11 +37,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 31) - Reducer 3 <- Map 4 (GROUP, 31) + Reducer 2 <- Map 6 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 6 (GROUP, 31) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -80,9 +84,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -100,6 +131,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -114,6 +172,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby7_map_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/spark/groupby7_map_multi_single_reducer.q.out index 625a7374c9..30f9af3fde 100644 --- a/ql/src/test/results/clientpositive/spark/groupby7_map_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby7_map_multi_single_reducer.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,7 +37,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 6 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -53,7 +58,35 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: substr(value, 5) (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -75,6 +108,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -93,6 +139,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToString(_col1) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -107,6 +180,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out index 4fbfd30ec6..9bcf4fc1a3 100644 --- a/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,12 +37,14 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 2 <- Map 8 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) - Reducer 5 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 6 <- Reducer 2 (GROUP, 31) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 8 Map Operator Tree: TableScan alias: src @@ -95,7 +99,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Reducer 5 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -115,6 +146,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -129,6 +187,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out index a26247af8d..7158f486d4 100644 --- a/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,11 +37,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 31) - Reducer 3 <- Map 4 (GROUP, 31) + Reducer 2 <- Map 6 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 6 (GROUP, 31) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -74,9 +78,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: complete @@ -94,6 +120,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -108,6 +156,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out index 2dce3016b3..150de1f431 100644 --- a/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,9 +37,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Reducer 5 (SORT, 1) - Reducer 4 <- Reducer 5 (SORT, 1) - Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 7 (SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Reducer 7 (SORT, 1) + Reducer 6 <- Reducer 5 (GROUP, 1) + Reducer 7 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -76,8 +80,30 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Reducer 4 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double) outputColumnNames: _col0, _col1 @@ -97,7 +123,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Reducer 5 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -127,6 +175,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby8.q.out b/ql/src/test/results/clientpositive/spark/groupby8.q.out index f7f3279e6b..79bb6abe39 100644 --- a/ql/src/test/results/clientpositive/spark/groupby8.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby8.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,12 +37,16 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) - Reducer 5 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 7 <- Reducer 2 (GROUP, 2) + Reducer 8 <- Reducer 7 (GROUP, 2) + Reducer 9 <- Reducer 8 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 10 Map Operator Tree: TableScan alias: src @@ -88,9 +94,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 5 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: final @@ -108,6 +148,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -122,6 +196,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -801,6 +889,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -808,12 +898,16 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) - Reducer 5 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 7 <- Reducer 2 (GROUP, 2) + Reducer 8 <- Reducer 7 (GROUP, 2) + Reducer 9 <- Reducer 8 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 10 Map Operator Tree: TableScan alias: src @@ -861,9 +955,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 5 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: final @@ -881,6 +1009,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -895,6 +1057,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby8_map.q.out b/ql/src/test/results/clientpositive/spark/groupby8_map.q.out index 288ca3f3b2..fa911bc03a 100644 --- a/ql/src/test/results/clientpositive/spark/groupby8_map.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby8_map.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,7 +37,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 6 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -52,7 +57,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -74,6 +107,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) @@ -92,6 +138,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToString(_col1) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -106,6 +179,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out index 9e76fd57aa..0b3520cc0c 100644 --- a/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,12 +37,14 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 2 <- Map 8 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) - Reducer 5 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 6 <- Reducer 2 (GROUP, 31) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 8 Map Operator Tree: TableScan alias: src @@ -94,7 +98,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Reducer 5 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -114,6 +145,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -128,6 +186,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out index 288ca3f3b2..c4127093ae 100644 --- a/ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,7 +37,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 6 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -52,7 +57,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -74,6 +107,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) @@ -92,6 +133,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToString(_col1) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Stage: Stage-0 Move Operator @@ -106,6 +169,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby9.q.out b/ql/src/test/results/clientpositive/spark/groupby9.q.out index d59d8cf706..4f2e65568c 100644 --- a/ql/src/test/results/clientpositive/spark/groupby9.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby9.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,11 +37,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -59,7 +63,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -99,9 +103,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -119,6 +150,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -133,6 +191,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -813,6 +885,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -820,11 +894,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -844,7 +920,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -884,9 +960,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -904,6 +1007,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -918,6 +1048,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -1598,6 +1742,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -1605,11 +1751,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -1629,7 +1777,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -1669,9 +1817,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -1689,6 +1864,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1703,6 +1905,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -2383,6 +2599,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -2390,11 +2608,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 2) - Reducer 3 <- Map 5 (GROUP, 2) + Reducer 2 <- Map 6 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -2415,7 +2635,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -2456,9 +2676,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -2476,6 +2723,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -2490,6 +2764,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -3170,6 +3458,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -3177,11 +3467,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -3201,7 +3493,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -3241,9 +3533,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -3261,6 +3580,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -3275,6 +3621,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out b/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out index cace09614b..43ab17aa8f 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out @@ -567,6 +567,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -574,13 +576,15 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 8 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 9 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 8 Map Operator Tree: TableScan alias: t1 @@ -601,7 +605,7 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Map 7 + Map 9 Map Operator Tree: TableScan alias: t1 @@ -657,9 +661,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 4 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: partials @@ -671,7 +702,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Reducer 5 + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -692,6 +723,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -706,6 +764,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out b/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out index e5c0402403..59a22c2897 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out @@ -24,12 +24,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -204,6 +206,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -237,6 +285,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) diff --git a/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out index 9895aec727..836208e130 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out @@ -24,12 +24,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -204,6 +206,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -237,6 +285,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) diff --git a/ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out index bf4132a974..c595a48e1d 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,11 +37,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -59,7 +63,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -99,9 +103,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: double) mode: mergepartial @@ -119,6 +150,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -133,6 +191,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out index c16df1b667..fec6003570 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out @@ -54,6 +54,9 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-2 depends on stages: Stage-3 Stage-4 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6 Stage-0 depends on stages: Stage-3 Stage-5 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 @@ -63,7 +66,12 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 6 (GROUP, 1) + Reducer 4 <- Reducer 7 (GROUP, 1) + Reducer 5 <- Reducer 8 (GROUP, 1) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -81,7 +89,49 @@ STAGE PLANS: Map-reduce partition columns: substr(key, 1, 1) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -103,6 +153,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -145,6 +208,66 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 + Reducer 7 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 >= 5) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 8 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 5) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Stage: Stage-2 Move Operator @@ -159,6 +282,27 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g3 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g4 + Stage: Stage-0 Move Operator tables: @@ -282,6 +426,11 @@ STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 Stage-6 depends on stages: Stage-2 + Stage-11 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10 + Stage-13 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10 + Stage-14 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10 + Stage-15 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10 Stage-0 depends on stages: Stage-5 Stage-7 depends on stages: Stage-0 Stage-1 depends on stages: Stage-5 @@ -295,12 +444,20 @@ STAGE PLANS: Stage: Stage-5 Spark Edges: - Reducer 2 <- Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 12 <- Map 10 (PARTITION-LEVEL SORT, 2) + Reducer 13 <- Map 10 (PARTITION-LEVEL SORT, 2) + Reducer 14 <- Map 10 (PARTITION-LEVEL SORT, 2) + Reducer 15 <- Map 11 (PARTITION-LEVEL SORT, 2) + Reducer 16 <- Map 11 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 12 (GROUP, 1) + Reducer 4 <- Reducer 13 (GROUP, 1) + Reducer 5 <- Reducer 14 (GROUP, 1) + Reducer 7 <- Reducer 15 (SORT, 1) + Reducer 8 <- Reducer 7 (GROUP, 1) + Reducer 9 <- Reducer 16 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 10 Map Operator Tree: TableScan alias: src @@ -315,7 +472,7 @@ STAGE PLANS: Map-reduce partition columns: substr(key, 1, 1) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Map 6 + Map 11 Map Operator Tree: TableScan alias: src @@ -330,7 +487,7 @@ STAGE PLANS: Map-reduce partition columns: substr(key, 1, 1) (type: string), substr(key, 2, 1) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Reducer 2 + Reducer 12 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -352,6 +509,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -394,7 +564,67 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 - Reducer 3 + Reducer 13 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 >= 5) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 14 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 5) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 15 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -412,7 +642,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: string), _col3 (type: double), _col4 (type: bigint) Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) @@ -435,8 +664,80 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h3 + Reducer 16 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 >= 5) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0), sum(KEY._col2:0._col0), count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col2) (type: int), concat(_col0, _col3) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: string), VALUE._col1 (type: double), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -456,6 +757,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Move Operator @@ -470,6 +812,41 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g3 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g4 + + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_h2 + + Stage: Stage-15 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_h3 + Stage: Stage-0 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out index 37deb9336f..0913b1405e 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,7 +37,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -52,7 +57,35 @@ STAGE PLANS: Map-reduce partition columns: substr(key, 1, 1) (type: string) Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE @@ -77,6 +110,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, c1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col0 < 5) (type: boolean) Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE @@ -98,6 +144,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 5) (type: boolean) + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Stage: Stage-0 Move Operator @@ -112,6 +188,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, c1 + Column Types: string, int + Table: default.dest_g2 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, int + Table: default.dest_g3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out index b6127475ea..b73e139122 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out @@ -40,6 +40,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -47,7 +49,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -64,7 +69,35 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -89,6 +122,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE @@ -110,6 +156,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) + Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -124,6 +200,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + Stage: Stage-1 Move Operator tables: @@ -212,6 +302,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -219,7 +311,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -236,7 +331,35 @@ STAGE PLANS: Map-reduce partition columns: value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -261,6 +384,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -282,6 +418,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -296,6 +462,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + Stage: Stage-1 Move Operator tables: @@ -384,6 +564,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -391,7 +573,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -408,7 +593,35 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -433,6 +646,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE @@ -454,6 +680,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) + Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -468,6 +724,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + Stage: Stage-1 Move Operator tables: @@ -556,6 +826,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -563,7 +835,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -580,7 +855,35 @@ STAGE PLANS: Map-reduce partition columns: value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -605,6 +908,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -626,6 +942,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -640,6 +986,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby_position.q.out b/ql/src/test/results/clientpositive/spark/groupby_position.q.out index 49eb3edd2f..42e9088900 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_position.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_position.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,11 +37,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -58,7 +62,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -97,9 +101,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -117,6 +148,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -131,6 +189,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.testtable1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.testtable2 + Stage: Stage-1 Move Operator tables: @@ -219,6 +291,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -226,11 +300,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -249,7 +325,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -288,9 +364,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -308,6 +411,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -322,6 +452,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.testtable1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.testtable2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out b/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out index bc14fe054a..8dfe94349c 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out @@ -24,12 +24,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -197,6 +199,47 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), c1 (type: int), c2 (type: string) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -230,6 +273,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) diff --git a/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out index a8eafaf867..fb7aee663f 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out @@ -24,12 +24,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -197,6 +199,47 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16), compute_stats(VALUE._col4, 16), compute_stats(VALUE._col5, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2400 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -230,6 +273,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) @@ -291,12 +342,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -443,7 +496,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"c1":"true","c2":"true","c3":"true","c4":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,c1,c2,c3,c4 @@ -464,6 +517,51 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16), compute_stats(VALUE._col4, 16), compute_stats(VALUE._col5, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -474,7 +572,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"c1":"true","c2":"true","c3":"true","c4":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,c1,c2,c3,c4 @@ -497,6 +595,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) diff --git a/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out b/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out index 6d087b281b..74d49385ab 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out @@ -400,6 +400,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -407,13 +409,15 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 8 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 9 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 8 Map Operator Tree: TableScan alias: t1 @@ -434,7 +438,7 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Map 7 + Map 9 Map Operator Tree: TableScan alias: t1 @@ -490,9 +494,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 4 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: partials @@ -504,7 +535,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Reducer 5 + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -525,6 +556,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -539,6 +597,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out index 7bb31111bb..2afea6c16f 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out @@ -46,10 +46,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -104,6 +107,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -159,6 +178,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -192,6 +241,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY @@ -237,12 +294,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -370,6 +429,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -403,6 +508,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, string, int + Table: default.outputtbl2 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY @@ -442,10 +555,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -479,7 +595,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -500,6 +616,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -555,6 +687,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -565,7 +727,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -588,6 +750,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key PREHOOK: type: QUERY @@ -625,10 +795,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -662,7 +835,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -683,6 +856,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -738,6 +927,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -748,7 +967,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -771,6 +990,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k PREHOOK: type: QUERY @@ -816,10 +1043,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -874,6 +1104,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -929,6 +1175,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -962,6 +1238,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key PREHOOK: type: QUERY @@ -1008,12 +1292,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1141,6 +1427,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1174,6 +1506,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val PREHOOK: type: QUERY @@ -1214,12 +1554,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1326,7 +1668,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -1347,6 +1689,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1357,7 +1745,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -1380,6 +1768,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 PREHOOK: type: QUERY @@ -1422,12 +1818,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1544,7 +1942,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1565,18 +1963,64 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false - - Stage: Stage-0 - Move Operator - tables: - replace: true + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} + bucket_count -1 column.name.delimiter , columns key,cnt columns.comments @@ -1598,6 +2042,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key + key, sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -1647,10 +2099,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1), Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1684,7 +2139,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1705,6 +2160,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1760,113 +2231,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Map 2 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 2 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 2 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,val - columns.comments - columns.types string:string + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - name default.t1 - numFiles 2 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [t1] + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1877,7 +2271,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1900,6 +2294,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key @@ -1929,15 +2331,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### 1 1 -1 1 -2 1 2 1 3 1 -3 1 -7 1 7 1 8 2 -8 2 PREHOOK: query: EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( @@ -1958,12 +2355,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 1), Reducer 4 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1997,7 +2396,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2005,19 +2404,35 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 + numFiles 2 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2073,7 +2488,7 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Map 2 + Map 3 Map Operator Tree: TableScan alias: t1 @@ -2153,7 +2568,37 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Reducer 3 + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -2181,7 +2626,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2189,19 +2634,35 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 + numFiles 2 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Stage: Stage-0 Move Operator @@ -2212,7 +2673,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2220,13 +2681,13 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 + numFiles 2 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 @@ -2235,6 +2696,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key @@ -2293,12 +2762,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2385,7 +2856,7 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Map 3 + Map 4 Map Operator Tree: TableScan alias: t1 @@ -2495,7 +2966,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2516,6 +2987,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2526,7 +3043,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2549,6 +3066,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -2866,12 +3391,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2909,7 +3436,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -2932,7 +3459,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -2979,7 +3506,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2995,11 +3522,57 @@ STAGE PLANS: serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3010,7 +3583,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3033,6 +3606,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key PREHOOK: type: QUERY @@ -3070,10 +3651,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3107,7 +3691,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3128,6 +3712,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3137,7 +3737,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3160,7 +3760,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3183,6 +3783,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3193,7 +3823,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3216,6 +3846,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val PREHOOK: type: QUERY @@ -3264,10 +3902,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3322,6 +3963,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key1, key2, key3, key4, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(key4, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3331,7 +3988,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3354,7 +4011,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3377,6 +4034,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3410,6 +4097,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, key4, cnt + Column Types: int, int, string, int, int + Table: default.outputtbl5 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl5 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 PREHOOK: type: QUERY @@ -3457,10 +4152,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3494,7 +4192,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3515,6 +4213,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3524,7 +4238,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3547,7 +4261,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3570,6 +4284,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3580,7 +4324,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3603,6 +4347,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, constant, val, count(1) from (SELECT key, 1 as constant, val from T2)subq @@ -3657,10 +4409,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3694,7 +4449,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3715,6 +4470,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3724,7 +4495,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3747,7 +4518,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3770,6 +4541,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3780,7 +4581,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3803,6 +4604,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 select key, constant3, val, count(1) from ( @@ -3871,6 +4680,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -3878,10 +4689,12 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 31) + Reducer 2 <- Map 5 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: t2 @@ -3925,6 +4738,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 6 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string), val (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -3945,6 +4790,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -3959,6 +4845,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -4032,6 +4932,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -4039,10 +4941,12 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 31) + Reducer 2 <- Map 5 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: t2 @@ -4089,6 +4993,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 6 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) = 8.0) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -4109,6 +5048,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -4123,6 +5103,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out index bf573b744c..120d4eaa96 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out @@ -46,10 +46,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -104,6 +107,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -159,6 +178,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -192,6 +241,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY @@ -237,6 +294,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -244,6 +302,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -389,6 +448,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -422,6 +527,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, string, int + Table: default.outputtbl2 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY @@ -461,10 +574,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -498,7 +614,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -519,6 +635,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -574,6 +706,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -584,7 +746,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -607,6 +769,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key PREHOOK: type: QUERY @@ -644,10 +814,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -681,7 +854,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -702,6 +875,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -757,6 +946,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -767,7 +986,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -790,6 +1009,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k PREHOOK: type: QUERY @@ -835,10 +1062,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -893,6 +1123,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -948,6 +1194,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -981,6 +1257,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key PREHOOK: type: QUERY @@ -1027,6 +1311,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -1034,6 +1319,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1179,6 +1465,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1212,6 +1544,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val PREHOOK: type: QUERY @@ -1252,6 +1592,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -1259,6 +1600,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1383,7 +1725,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -1404,6 +1746,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1414,7 +1802,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -1437,6 +1825,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 PREHOOK: type: QUERY @@ -1479,6 +1875,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -1486,6 +1883,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1620,7 +2018,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1641,18 +2039,64 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false - - Stage: Stage-0 - Move Operator - tables: - replace: true + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} + bucket_count -1 column.name.delimiter , columns key,cnt columns.comments @@ -1674,6 +2118,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT cast(key + key as string), sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -1723,10 +2175,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1), Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1760,7 +2215,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1781,6 +2236,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1836,113 +2307,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Map 2 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 2 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 2 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,val - columns.comments - columns.types string:string + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - name default.t1 - numFiles 2 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [t1] + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1953,7 +2347,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1976,6 +2370,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key @@ -2005,15 +2407,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### 1 1 -1 1 -2 1 2 1 3 1 -3 1 -7 1 7 1 8 2 -8 2 PREHOOK: query: EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( @@ -2034,13 +2431,15 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 1), Reducer 5 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2074,7 +2473,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2082,19 +2481,35 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 + numFiles 2 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2150,7 +2565,7 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Map 2 + Map 3 Map Operator Tree: TableScan alias: t1 @@ -2230,7 +2645,37 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Reducer 3 + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -2248,7 +2693,7 @@ STAGE PLANS: tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false - Reducer 4 + Reducer 5 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -2276,7 +2721,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2284,19 +2729,35 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 + numFiles 2 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Stage: Stage-0 Move Operator @@ -2307,7 +2768,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2315,13 +2776,13 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 + numFiles 2 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 @@ -2330,6 +2791,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key @@ -2388,12 +2857,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2480,7 +2951,7 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Map 3 + Map 4 Map Operator Tree: TableScan alias: t1 @@ -2590,7 +3061,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2611,6 +3082,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2621,7 +3138,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2644,6 +3161,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -2980,6 +3505,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -2987,6 +3513,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3024,7 +3551,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3047,7 +3574,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3112,7 +3639,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3128,11 +3655,57 @@ STAGE PLANS: serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3143,7 +3716,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3166,6 +3739,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key PREHOOK: type: QUERY @@ -3203,10 +3784,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3240,7 +3824,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3261,6 +3845,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3270,7 +3870,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3293,7 +3893,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3316,6 +3916,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3326,7 +3956,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3349,6 +3979,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val PREHOOK: type: QUERY @@ -3397,10 +4035,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3455,6 +4096,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key1, key2, key3, key4, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(key4, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3464,7 +4121,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3487,7 +4144,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3510,6 +4167,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3543,6 +4230,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, key4, cnt + Column Types: int, int, string, int, int + Table: default.outputtbl5 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl5 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 PREHOOK: type: QUERY @@ -3590,10 +4285,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3627,7 +4325,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3648,6 +4346,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3657,7 +4371,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3680,7 +4394,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3703,6 +4417,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3713,7 +4457,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3736,6 +4480,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, constant, val, count(1) from (SELECT key, 1 as constant, val from T2)subq @@ -3790,10 +4542,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3827,7 +4582,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3848,6 +4603,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3857,7 +4628,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3880,7 +4651,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3903,6 +4674,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3913,7 +4714,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3936,6 +4737,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 select key, constant3, val, count(1) from ( @@ -4004,6 +4813,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -4011,11 +4822,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 7 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: t2 @@ -4059,6 +4872,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 7 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string), val (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -4093,6 +4938,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -4107,6 +4993,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -4180,6 +5080,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -4187,11 +5089,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 7 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: t2 @@ -4238,6 +5142,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 7 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) = 8.0) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -4272,6 +5211,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -4286,6 +5266,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_bucketed_table.q.out b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_bucketed_table.q.out index 1efb81b35f..35b68914b5 100644 --- a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_bucketed_table.q.out +++ b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_bucketed_table.q.out @@ -42,7 +42,7 @@ Database: default Table: test_table_bucketed #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 309 rawDataSize 1482 diff --git a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out index 356f6254a7..7e5a92ba25 100644 --- a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out +++ b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out @@ -56,10 +56,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -89,6 +92,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, part + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -105,6 +143,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT key, count(*) FROM test_table1 GROUP BY key PREHOOK: type: QUERY @@ -139,7 +184,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 309 rawDataSize 1482 @@ -177,12 +222,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -209,7 +256,7 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(_col1) (type: double) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src @@ -248,6 +295,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, part + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -264,6 +346,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT a.key, a.value FROM ( SELECT key, cast(count(*) AS STRING) AS value FROM test_table1 GROUP BY key @@ -337,10 +426,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -371,6 +463,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -387,6 +514,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key PREHOOK: type: QUERY @@ -423,7 +557,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1028 rawDataSize 10968 @@ -452,12 +586,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -508,6 +644,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -524,6 +695,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key GROUP BY b.value @@ -562,7 +740,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 309 rawDataSize 2728 diff --git a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out index 175ddd6a02..973c8d0c4c 100644 --- a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out +++ b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out @@ -40,7 +40,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1028 rawDataSize 10968 @@ -91,7 +91,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1028 rawDataSize 10968 diff --git a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_num_buckets.q.out b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_num_buckets.q.out index 13219ac56d..fb0f6b91db 100644 --- a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_num_buckets.q.out +++ b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_num_buckets.q.out @@ -26,12 +26,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -62,6 +64,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table + Select Operator + expressions: _col0 (type: int), _col1 (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2008-04-08' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-04-08' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-04-08' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-04-08' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -79,6 +116,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table + PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) SELECT key2, value, cast(hr as int) FROM (SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 3 % 2) as hr @@ -129,7 +173,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 670 rawDataSize 5888 @@ -169,7 +213,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 330 rawDataSize 2924 diff --git a/ql/src/test/results/clientpositive/spark/innerjoin.q.out b/ql/src/test/results/clientpositive/spark/innerjoin.q.out index 9328b99b0f..4d2f5eca95 100644 --- a/ql/src/test/results/clientpositive/spark/innerjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/innerjoin.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -43,7 +45,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -83,6 +85,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -97,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + PREHOOK: query: FROM src src1 INNER JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/input12.q.out b/ql/src/test/results/clientpositive/spark/input12.q.out index 2efd81b86a..ed1e45c95c 100644 --- a/ql/src/test/results/clientpositive/spark/input12.q.out +++ b/ql/src/test/results/clientpositive/spark/input12.q.out @@ -38,6 +38,9 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 Stage-2 depends on stages: Stage-3 @@ -46,9 +49,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -68,6 +75,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key >= 100) and (key < 200)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -98,6 +118,106 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Map 6 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key >= 100) and (key < 200)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key >= 200) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -112,6 +232,27 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/input13.q.out b/ql/src/test/results/clientpositive/spark/input13.q.out index 09c7959bfb..e25139c02a 100644 --- a/ql/src/test/results/clientpositive/spark/input13.q.out +++ b/ql/src/test/results/clientpositive/spark/input13.q.out @@ -40,6 +40,9 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-5, Stage-6, Stage-7, Stage-3 + Stage-9 depends on stages: Stage-5, Stage-6, Stage-7, Stage-3 + Stage-10 depends on stages: Stage-5, Stage-6, Stage-7, Stage-3 Stage-1 depends on stages: Stage-4 Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -49,9 +52,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -71,6 +78,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key >= 100) and (key < 200)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -115,6 +135,106 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 6 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key >= 100) and (key < 200)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key >= 200) and (key < 300)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -129,6 +249,27 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/input14.q.out b/ql/src/test/results/clientpositive/spark/input14.q.out index 36f162eb90..99fcce42de 100644 --- a/ql/src/test/results/clientpositive/spark/input14.q.out +++ b/ql/src/test/results/clientpositive/spark/input14.q.out @@ -28,12 +28,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -75,6 +77,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -89,6 +118,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM src SELECT TRANSFORM(src.key, src.value) diff --git a/ql/src/test/results/clientpositive/spark/input17.q.out b/ql/src/test/results/clientpositive/spark/input17.q.out index d95dbcb61a..44fff1c529 100644 --- a/ql/src/test/results/clientpositive/spark/input17.q.out +++ b/ql/src/test/results/clientpositive/spark/input17.q.out @@ -28,12 +28,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +74,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -86,6 +115,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM src_thrift SELECT TRANSFORM(src_thrift.aint + src_thrift.lint[0], src_thrift.lintstring[0]) diff --git a/ql/src/test/results/clientpositive/spark/input18.q.out b/ql/src/test/results/clientpositive/spark/input18.q.out index 65850b2ef5..13986cb5c8 100644 --- a/ql/src/test/results/clientpositive/spark/input18.q.out +++ b/ql/src/test/results/clientpositive/spark/input18.q.out @@ -28,12 +28,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -75,6 +77,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -89,6 +118,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM src SELECT TRANSFORM(src.key, src.value, 1+2, 3+4) diff --git a/ql/src/test/results/clientpositive/spark/input1_limit.q.out b/ql/src/test/results/clientpositive/spark/input1_limit.q.out index dd49287a2c..5e3ad714dd 100644 --- a/ql/src/test/results/clientpositive/spark/input1_limit.q.out +++ b/ql/src/test/results/clientpositive/spark/input1_limit.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,11 +37,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 1) - Reducer 3 <- Map 5 (GROUP, 1) + Reducer 2 <- Map 6 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 1) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -57,9 +61,8 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -77,7 +80,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -100,8 +102,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 @@ -121,6 +150,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -135,6 +191,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/input_part2.q.out b/ql/src/test/results/clientpositive/spark/input_part2.q.out index e1582e2875..e26201f908 100644 --- a/ql/src/test/results/clientpositive/spark/input_part2.q.out +++ b/ql/src/test/results/clientpositive/spark/input_part2.q.out @@ -28,15 +28,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: srcpart @@ -82,6 +87,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '12' (type: string), '2008-04-08' (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(hr, 16), compute_stats(ds, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: ((key < 100) and (ds = '2008-04-09')) (type: boolean) @@ -224,6 +245,198 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] + Map 5 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key < 100) and (ds = '2008-04-09')) (type: boolean) + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string), '12' (type: string), '2008-04-09' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '12' (type: string), '2008-04-09' (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(hr, 16), compute_stats(ds, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=12 [srcpart] + /srcpart/ds=2008-04-09/hr=12 [srcpart] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -257,6 +470,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: int, string, string, string + Table: default.dest2 + Is Table Level Stats: true + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/insert_into1.q.out b/ql/src/test/results/clientpositive/spark/insert_into1.q.out index dff389db9c..b05e218151 100644 --- a/ql/src/test/results/clientpositive/spark/insert_into1.q.out +++ b/ql/src/test/results/clientpositive/spark/insert_into1.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -39,7 +41,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -62,6 +63,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -76,6 +104,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * from src ORDER BY key LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -132,12 +167,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -153,7 +190,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -176,6 +212,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -190,6 +253,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -246,12 +316,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -267,7 +339,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -290,6 +361,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -304,6 +402,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + PREHOOK: query: INSERT OVERWRITE TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -360,10 +465,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -384,6 +492,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: 1 (type: int), 'a' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -398,6 +533,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + PREHOOK: query: insert overwrite table insert_into1 select 1, 'a' PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -416,10 +558,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -440,6 +585,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: 2 (type: int), 'b' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -454,6 +626,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + PREHOOK: query: insert into insert_into1 select 2, 'b' PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table diff --git a/ql/src/test/results/clientpositive/spark/insert_into2.q.out b/ql/src/test/results/clientpositive/spark/insert_into2.q.out index 329387dd91..f211ae46f9 100644 --- a/ql/src/test/results/clientpositive/spark/insert_into2.q.out +++ b/ql/src/test/results/clientpositive/spark/insert_into2.q.out @@ -22,12 +22,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -43,7 +45,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -66,6 +67,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -82,6 +118,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + PREHOOK: query: INSERT INTO TABLE insert_into2 PARTITION (ds='1') SELECT * FROM src order by key limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -177,12 +220,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -198,7 +243,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -221,6 +265,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -237,6 +316,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 100 PREHOOK: type: QUERY @@ -301,12 +387,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -322,7 +410,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -345,6 +432,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -361,6 +483,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 50 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/insert_into3.q.out b/ql/src/test/results/clientpositive/spark/insert_into3.q.out index a6fac2336b..67039ec592 100644 --- a/ql/src/test/results/clientpositive/spark/insert_into3.q.out +++ b/ql/src/test/results/clientpositive/spark/insert_into3.q.out @@ -32,6 +32,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -39,11 +41,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (SORT, 1) - Reducer 3 <- Map 5 (SORT, 1) + Reducer 2 <- Map 6 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 6 (SORT, 1) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -56,21 +60,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Map 5 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reducer 2 Reduce Operator Tree: Select Operator @@ -92,8 +81,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 @@ -113,6 +129,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -127,6 +170,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3a + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3b + Stage: Stage-1 Move Operator tables: @@ -192,6 +249,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -199,11 +258,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 1) - Reducer 3 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 6 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 6 (GROUP, 1) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -218,7 +279,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -241,8 +301,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 @@ -262,6 +349,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -276,6 +390,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3a + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3b + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/join1.q.out b/ql/src/test/results/clientpositive/spark/join1.q.out index a0ee4ea5b8..6f67f692b2 100644 --- a/ql/src/test/results/clientpositive/spark/join1.q.out +++ b/ql/src/test/results/clientpositive/spark/join1.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -43,7 +45,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -83,6 +85,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -97,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/join14.q.out b/ql/src/test/results/clientpositive/spark/join14.q.out index e804a1d9a5..7fda67aabc 100644 --- a/ql/src/test/results/clientpositive/spark/join14.q.out +++ b/ql/src/test/results/clientpositive/spark/join14.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -43,7 +45,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 4 Map Operator Tree: TableScan alias: srcpart @@ -83,6 +85,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c1, c2 + Statistics: Num rows: 366 Data size: 3890 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -97,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.dest1 + PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/join17.q.out b/ql/src/test/results/clientpositive/spark/join17.q.out index aa4736c860..68820f7a81 100644 --- a/ql/src/test/results/clientpositive/spark/join17.q.out +++ b/ql/src/test/results/clientpositive/spark/join17.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -100,7 +102,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src1] - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -221,6 +223,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(value1, 16), compute_stats(key2, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -254,6 +302,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/join25.q.out b/ql/src/test/results/clientpositive/spark/join25.q.out index 05e5e701f9..9928fa25f4 100644 --- a/ql/src/test/results/clientpositive/spark/join25.q.out +++ b/ql/src/test/results/clientpositive/spark/join25.q.out @@ -21,6 +21,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -48,6 +49,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -84,8 +87,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -100,6 +130,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/spark/join26.q.out b/ql/src/test/results/clientpositive/spark/join26.q.out index 9f85a3e995..34ced4fb7e 100644 --- a/ql/src/test/results/clientpositive/spark/join26.q.out +++ b/ql/src/test/results/clientpositive/spark/join26.q.out @@ -23,13 +23,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: x @@ -102,7 +103,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 3 + Map 4 Map Operator Tree: TableScan alias: z @@ -180,6 +181,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -206,8 +209,8 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col1, _col2, _col4 input vertices: - 1 Map 2 - 2 Map 3 + 1 Map 3 + 2 Map 4 Position of Big Table: 0 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -246,6 +249,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -299,6 +318,36 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [y] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -332,6 +381,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/spark/join27.q.out b/ql/src/test/results/clientpositive/spark/join27.q.out index e10d2fb04a..385a64d3e9 100644 --- a/ql/src/test/results/clientpositive/spark/join27.q.out +++ b/ql/src/test/results/clientpositive/spark/join27.q.out @@ -21,6 +21,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -48,6 +49,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -84,8 +87,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -100,6 +130,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.value = y.value) diff --git a/ql/src/test/results/clientpositive/spark/join28.q.out b/ql/src/test/results/clientpositive/spark/join28.q.out index 8d4d870f89..189760a498 100644 --- a/ql/src/test/results/clientpositive/spark/join28.q.out +++ b/ql/src/test/results/clientpositive/spark/join28.q.out @@ -27,6 +27,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -51,7 +52,7 @@ STAGE PLANS: 1 _col0 (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan alias: x @@ -72,6 +73,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -94,7 +97,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) @@ -122,8 +125,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -138,6 +168,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq.key1, z.value FROM diff --git a/ql/src/test/results/clientpositive/spark/join29.q.out b/ql/src/test/results/clientpositive/spark/join29.q.out index 573628fe9b..62ba549230 100644 --- a/ql/src/test/results/clientpositive/spark/join29.q.out +++ b/ql/src/test/results/clientpositive/spark/join29.q.out @@ -23,6 +23,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -74,6 +75,7 @@ STAGE PLANS: Spark Edges: Reducer 4 <- Map 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 3 @@ -132,6 +134,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, cnt1, cnt2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt1, 16), compute_stats(cnt2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -146,6 +175,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt1, cnt2 + Column Types: string, int, int + Table: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq1.key, subq1.cnt, subq2.cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN diff --git a/ql/src/test/results/clientpositive/spark/join3.q.out b/ql/src/test/results/clientpositive/spark/join3.q.out index e50f091277..056aa0f339 100644 --- a/ql/src/test/results/clientpositive/spark/join3.q.out +++ b/ql/src/test/results/clientpositive/spark/join3.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -43,7 +45,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -60,7 +62,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 4 + Map 5 Map Operator Tree: TableScan alias: src3 @@ -102,6 +104,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -116,6 +145,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/join30.q.out b/ql/src/test/results/clientpositive/spark/join30.q.out index 3584f8cffc..f673037eb9 100644 --- a/ql/src/test/results/clientpositive/spark/join30.q.out +++ b/ql/src/test/results/clientpositive/spark/join30.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -48,6 +49,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -106,6 +108,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -120,6 +149,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/join31.q.out b/ql/src/test/results/clientpositive/spark/join31.q.out index 3fee7b8f62..363a624f16 100644 --- a/ql/src/test/results/clientpositive/spark/join31.q.out +++ b/ql/src/test/results/clientpositive/spark/join31.q.out @@ -25,13 +25,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 5 Map Operator Tree: TableScan alias: y @@ -60,6 +61,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -97,7 +99,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 input vertices: - 1 Map 4 + 1 Map 5 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) @@ -131,6 +133,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -145,6 +174,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq1.key, count(1) as cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN diff --git a/ql/src/test/results/clientpositive/spark/join32.q.out b/ql/src/test/results/clientpositive/spark/join32.q.out index 2f50aef7ae..de997b7651 100644 --- a/ql/src/test/results/clientpositive/spark/join32.q.out +++ b/ql/src/test/results/clientpositive/spark/join32.q.out @@ -23,13 +23,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: x @@ -101,7 +102,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 3 + Map 4 Map Operator Tree: TableScan alias: y @@ -176,6 +177,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -200,7 +203,7 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col0, _col3 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -211,7 +214,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col3, _col6 input vertices: - 1 Map 3 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -250,6 +253,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -305,6 +324,36 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -338,6 +387,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out index f9e81aa47a..94e7df1698 100644 --- a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out @@ -31,13 +31,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: x @@ -109,7 +110,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 3 + Map 4 Map Operator Tree: TableScan alias: y @@ -184,6 +185,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -208,7 +211,7 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col0, _col3 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -219,7 +222,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col3, _col6 input vertices: - 1 Map 3 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -258,6 +261,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -313,6 +332,36 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -346,6 +395,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) @@ -482,6 +539,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-4 @@ -648,7 +706,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 4 + Map 5 Map Operator Tree: TableScan alias: y @@ -723,6 +781,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 3 @@ -758,7 +818,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col3, _col6 input vertices: - 1 Map 4 + 1 Map 5 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -776,7 +836,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -797,6 +857,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -850,6 +926,36 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [w] + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -860,7 +966,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -883,6 +989,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src w JOIN src1 x ON (x.value = w.value) @@ -1014,6 +1128,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -1094,7 +1209,7 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] - Map 3 + Map 4 Map Operator Tree: TableScan alias: x @@ -1169,6 +1284,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -1193,7 +1310,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 3 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1247,6 +1364,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -1300,6 +1433,36 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [y] + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1333,6 +1496,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j2 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, z.value, res.value FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res @@ -1466,6 +1637,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -1617,6 +1789,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 3 @@ -1674,7 +1848,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -1695,6 +1869,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -1748,6 +1938,36 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1758,7 +1978,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -1781,6 +2001,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j2 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, z.value, res.value FROM (select x.key, x.value from src1 x LEFT OUTER JOIN src y ON (x.key = y.key)) res @@ -1926,6 +2154,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -1950,7 +2179,7 @@ STAGE PLANS: 1 _col1 (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan alias: x @@ -1971,6 +2200,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -1993,7 +2224,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col2 (type: string) @@ -2021,8 +2252,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -2037,6 +2295,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j2 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, x.value, res.value FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res @@ -2170,6 +2435,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -2194,7 +2460,7 @@ STAGE PLANS: 1 _col1 (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan alias: x @@ -2215,6 +2481,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -2237,7 +2505,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col2 (type: string) @@ -2265,8 +2533,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -2281,6 +2576,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j2 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, y.value, res.value FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res diff --git a/ql/src/test/results/clientpositive/spark/join33.q.out b/ql/src/test/results/clientpositive/spark/join33.q.out index 2f50aef7ae..de997b7651 100644 --- a/ql/src/test/results/clientpositive/spark/join33.q.out +++ b/ql/src/test/results/clientpositive/spark/join33.q.out @@ -23,13 +23,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: x @@ -101,7 +102,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 3 + Map 4 Map Operator Tree: TableScan alias: y @@ -176,6 +177,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -200,7 +203,7 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col0, _col3 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -211,7 +214,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col3, _col6 input vertices: - 1 Map 3 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -250,6 +253,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -305,6 +324,36 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -338,6 +387,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/spark/join34.q.out b/ql/src/test/results/clientpositive/spark/join34.q.out index e32f3e4d11..6a7be7d04f 100644 --- a/ql/src/test/results/clientpositive/spark/join34.q.out +++ b/ql/src/test/results/clientpositive/spark/join34.q.out @@ -30,12 +30,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -112,7 +114,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [x] - Map 3 + Map 4 Map Operator Tree: TableScan alias: x1 @@ -186,7 +188,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [x1] - Map 4 + Map 5 Map Operator Tree: TableScan alias: x @@ -307,6 +309,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -340,6 +388,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, x.value, subq1.value FROM diff --git a/ql/src/test/results/clientpositive/spark/join35.q.out b/ql/src/test/results/clientpositive/spark/join35.q.out index e1a96f66a0..405db547b7 100644 --- a/ql/src/test/results/clientpositive/spark/join35.q.out +++ b/ql/src/test/results/clientpositive/spark/join35.q.out @@ -30,14 +30,16 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Map 4 (GROUP, 2) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -120,7 +122,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [x] - Map 4 + Map 5 Map Operator Tree: TableScan alias: x1 @@ -200,7 +202,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [x1] - Map 6 + Map 7 Map Operator Tree: TableScan alias: x @@ -339,7 +341,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false - Reducer 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, value, val2 + Statistics: Num rows: 182 Data size: 1938 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 6 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -390,6 +438,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, int + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, x.value, subq1.cnt FROM diff --git a/ql/src/test/results/clientpositive/spark/join36.q.out b/ql/src/test/results/clientpositive/spark/join36.q.out index b1717e02e3..7fd1810923 100644 --- a/ql/src/test/results/clientpositive/spark/join36.q.out +++ b/ql/src/test/results/clientpositive/spark/join36.q.out @@ -61,13 +61,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: y @@ -88,6 +89,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -110,7 +113,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 339 Data size: 1630 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) @@ -124,8 +127,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, value, val2 + Statistics: Num rows: 339 Data size: 1630 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -140,6 +170,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, int, int + Table: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt FROM tmp1 x JOIN tmp2 y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/spark/join37.q.out b/ql/src/test/results/clientpositive/spark/join37.q.out index 327e93ecd2..535a14d58f 100644 --- a/ql/src/test/results/clientpositive/spark/join37.q.out +++ b/ql/src/test/results/clientpositive/spark/join37.q.out @@ -21,6 +21,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -48,6 +49,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -84,8 +87,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -100,6 +130,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/spark/join39.q.out b/ql/src/test/results/clientpositive/spark/join39.q.out index 8f0ba62f2b..039feca2b8 100644 --- a/ql/src/test/results/clientpositive/spark/join39.q.out +++ b/ql/src/test/results/clientpositive/spark/join39.q.out @@ -21,13 +21,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src @@ -48,6 +49,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -67,7 +70,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -77,8 +80,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, key1, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(key1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -93,6 +123,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, key1, val2 + Column Types: string, string, string, string + Table: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/spark/join4.q.out b/ql/src/test/results/clientpositive/spark/join4.q.out index 8ccc907b6c..5ad0577c3e 100644 --- a/ql/src/test/results/clientpositive/spark/join4.q.out +++ b/ql/src/test/results/clientpositive/spark/join4.q.out @@ -40,12 +40,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +68,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -106,6 +108,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -120,6 +149,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/spark/join5.q.out b/ql/src/test/results/clientpositive/spark/join5.q.out index 2ab020ed18..2334161010 100644 --- a/ql/src/test/results/clientpositive/spark/join5.q.out +++ b/ql/src/test/results/clientpositive/spark/join5.q.out @@ -40,12 +40,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +68,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -106,6 +108,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -120,6 +149,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/spark/join6.q.out b/ql/src/test/results/clientpositive/spark/join6.q.out index 8ae5e3af59..89da894cba 100644 --- a/ql/src/test/results/clientpositive/spark/join6.q.out +++ b/ql/src/test/results/clientpositive/spark/join6.q.out @@ -40,12 +40,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +68,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -106,6 +108,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -120,6 +149,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/spark/join7.q.out b/ql/src/test/results/clientpositive/spark/join7.q.out index 45c42319c3..6bf83a04d9 100644 --- a/ql/src/test/results/clientpositive/spark/join7.q.out +++ b/ql/src/test/results/clientpositive/spark/join7.q.out @@ -50,12 +50,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -76,7 +78,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -94,7 +96,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 4 + Map 5 Map Operator Tree: TableScan alias: src3 @@ -136,6 +138,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -150,6 +179,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6 + Column Types: int, string, int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/spark/join8.q.out b/ql/src/test/results/clientpositive/spark/join8.q.out index f8faaa7de7..b1f66e7add 100644 --- a/ql/src/test/results/clientpositive/spark/join8.q.out +++ b/ql/src/test/results/clientpositive/spark/join8.q.out @@ -40,12 +40,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +68,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -109,6 +111,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -123,6 +152,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/spark/join9.q.out b/ql/src/test/results/clientpositive/spark/join9.q.out index 373ac8b984..04eb62a6c7 100644 --- a/ql/src/test/results/clientpositive/spark/join9.q.out +++ b/ql/src/test/results/clientpositive/spark/join9.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -101,7 +103,7 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=12 [src1] - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -222,6 +224,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -255,6 +303,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/join_map_ppr.q.out b/ql/src/test/results/clientpositive/spark/join_map_ppr.q.out index 86f3d9a8c4..f73fbecd59 100644 --- a/ql/src/test/results/clientpositive/spark/join_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/join_map_ppr.q.out @@ -25,6 +25,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -172,6 +173,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 3 @@ -234,6 +237,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -289,6 +308,36 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -322,6 +371,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) @@ -517,6 +574,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -550,7 +608,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -571,7 +629,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -619,7 +677,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -640,7 +698,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -664,6 +722,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 3 @@ -705,7 +765,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -726,6 +786,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -781,6 +857,36 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -791,7 +897,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -814,6 +920,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value FROM src1_copy x JOIN src_copy y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.out b/ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.out index cdd5c661d7..be0c4c2ee2 100644 --- a/ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.out +++ b/ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.out @@ -26,10 +26,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +75,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -123,6 +145,41 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -156,6 +213,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from src PREHOOK: type: QUERY @@ -198,7 +263,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 diff --git a/ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out b/ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out index 08df5d3772..bc2e3360a9 100644 --- a/ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out +++ b/ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out @@ -26,10 +26,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +75,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -174,6 +196,41 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -207,6 +264,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -253,7 +318,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 1000 rawDataSize 9624 @@ -314,7 +379,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out index 5bf7f2884e..20eaf3542d 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out @@ -56,15 +56,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 2) + Reducer 3 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: srcpart @@ -84,6 +89,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE @@ -99,6 +120,72 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 + Map 5 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ds > '2008-04-08') (type: boolean) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -116,6 +203,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out index 5ec6d68ea3..4a46824ece 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out @@ -46,10 +46,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -69,6 +72,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part10 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-12-31' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -86,6 +124,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part10 + PREHOOK: query: from srcpart insert overwrite table nzhang_part10 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out index 7f403d575d..9f37175a57 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out @@ -60,10 +60,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2), Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -86,7 +89,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part13 - Map 2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-03' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Map 3 Map Operator Tree: TableScan alias: src @@ -106,6 +125,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part13 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-03' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -123,6 +177,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part13 + PREHOOK: query: insert overwrite table nzhang_part13 partition (ds="2010-03-03", hr) select * from ( select key, value, '22' diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out index ce65e718d9..5315b1d9b2 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out @@ -47,14 +47,16 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 1 (GROUP, 1) - Reducer 6 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 2), Reducer 5 (GROUP, 2), Reducer 7 (GROUP, 2) + Reducer 5 <- Map 1 (GROUP, 1) + Reducer 7 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -70,7 +72,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reducer 2 Reduce Operator Tree: Limit @@ -88,7 +89,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1130 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1130 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 1130 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Limit Number of rows: 2 @@ -105,7 +141,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) + Reducer 7 Reduce Operator Tree: Limit Number of rows: 2 @@ -122,6 +174,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) Stage: Stage-0 Move Operator @@ -138,6 +206,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.nzhang_part14 + PREHOOK: query: insert overwrite table nzhang_part14 partition(value) select key, value from ( select * from (select 'k1' as key, cast(null as string) as value from src limit 2)a diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part2.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part2.q.out index d066b3ae11..724feeeab7 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part2.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part2.q.out @@ -38,12 +38,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 10) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -74,6 +76,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part_bucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-23' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +128,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part_bucket + PREHOOK: query: insert overwrite table nzhang_part_bucket partition (ds='2010-03-23', hr) select key, value, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out index d120963697..67b70ed5ec 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out @@ -44,10 +44,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -67,6 +70,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -84,6 +122,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part3 + PREHOOK: query: insert overwrite table nzhang_part3 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out index 7ec76b5c77..03a2b1d318 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out @@ -54,10 +54,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -77,6 +80,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -94,6 +132,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part4 + PREHOOK: query: insert overwrite table nzhang_part4 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out index b132a591d4..e8713a2e92 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out @@ -31,10 +31,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -54,6 +57,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -70,6 +108,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.nzhang_part5 + PREHOOK: query: insert overwrite table nzhang_part5 partition (value) select key, value from src PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out index e19a986952..be9eb0cfee 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out @@ -48,15 +48,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 2) + Reducer 3 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: srcpart @@ -99,6 +104,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: (ds > '2008-04-08') (type: boolean) @@ -337,6 +361,309 @@ STAGE PLANS: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=11 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] + Map 5 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (ds > '2008-04-08') (type: boolean) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + /srcpart/ds=2008-04-09/hr=11 [srcpart] + /srcpart/ds=2008-04-09/hr=12 [srcpart] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -370,6 +697,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part8 + Is Table Level Stats: false + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part8 + Is Table Level Stats: false + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out index 55bcfae4fc..5cf30346b2 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out @@ -46,10 +46,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -69,6 +72,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part9 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -86,6 +124,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part9 + PREHOOK: query: from srcpart insert overwrite table nzhang_part9 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/mapreduce1.q.out b/ql/src/test/results/clientpositive/spark/mapreduce1.q.out index d75b482871..eafc1e536b 100644 --- a/ql/src/test/results/clientpositive/spark/mapreduce1.q.out +++ b/ql/src/test/results/clientpositive/spark/mapreduce1.q.out @@ -26,12 +26,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -70,6 +72,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -84,6 +113,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/spark/mapreduce2.q.out b/ql/src/test/results/clientpositive/spark/mapreduce2.q.out index adfb503aa7..ea93af28d5 100644 --- a/ql/src/test/results/clientpositive/spark/mapreduce2.q.out +++ b/ql/src/test/results/clientpositive/spark/mapreduce2.q.out @@ -24,12 +24,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -67,6 +69,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -81,6 +110,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/spark/merge1.q.out b/ql/src/test/results/clientpositive/spark/merge1.q.out index d5b1e9f9d7..015fd800d6 100644 --- a/ql/src/test/results/clientpositive/spark/merge1.q.out +++ b/ql/src/test/results/clientpositive/spark/merge1.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -29,6 +30,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +74,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -95,6 +124,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.dest1 + Stage: Stage-3 Spark #### A masked pattern was here #### @@ -516,6 +552,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -523,6 +560,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -542,6 +581,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -565,6 +631,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 + Stage: Stage-3 Spark #### A masked pattern was here #### @@ -626,6 +699,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -633,6 +707,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -652,6 +728,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -675,6 +778,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 + Stage: Stage-3 Spark #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/merge2.q.out b/ql/src/test/results/clientpositive/spark/merge2.q.out index d780dc2cfa..acb5c0d60d 100644 --- a/ql/src/test/results/clientpositive/spark/merge2.q.out +++ b/ql/src/test/results/clientpositive/spark/merge2.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -29,6 +30,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +74,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -95,6 +124,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.test1 + Stage: Stage-3 Spark #### A masked pattern was here #### @@ -516,6 +552,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -523,6 +560,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -542,6 +581,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -565,6 +631,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 + Stage: Stage-3 Spark #### A masked pattern was here #### @@ -626,6 +699,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -633,6 +707,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -652,6 +728,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -675,6 +778,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 + Stage: Stage-3 Spark #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out index 79fc6ae3e7..ae89612aac 100644 --- a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out @@ -183,52 +183,12 @@ POSTHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -239,52 +199,12 @@ POSTHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl_part - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -295,56 +215,12 @@ POSTHOOK: query: explain select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -355,56 +231,12 @@ POSTHOOK: query: explain select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl_part - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink diff --git a/ql/src/test/results/clientpositive/spark/multi_insert.q.out b/ql/src/test/results/clientpositive/spark/multi_insert.q.out index 33af962c97..477b6dbea2 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert.q.out @@ -28,15 +28,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -56,6 +61,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -71,6 +89,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -85,6 +156,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -165,15 +250,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -193,6 +283,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -208,6 +311,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -222,6 +378,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -302,15 +472,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -330,6 +505,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -345,6 +533,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -359,6 +600,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -439,15 +694,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -467,6 +727,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -482,6 +755,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -496,6 +822,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -576,6 +916,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -583,7 +925,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -599,7 +944,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -619,6 +992,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -635,6 +1021,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -649,6 +1060,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -722,6 +1147,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -729,7 +1156,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -745,7 +1175,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -765,6 +1223,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -781,6 +1252,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -795,6 +1291,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -868,6 +1378,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -875,7 +1387,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -891,7 +1406,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -911,6 +1454,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -927,6 +1483,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -941,6 +1522,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1014,6 +1609,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -1021,7 +1618,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1037,7 +1637,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -1057,6 +1685,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1073,6 +1714,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -1087,6 +1753,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1160,15 +1840,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1188,6 +1873,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1199,7 +1897,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1209,27 +1907,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1244,6 +1964,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1343,15 +2077,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1371,6 +2110,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1382,7 +2134,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1392,27 +2144,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1427,6 +2201,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1526,15 +2314,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1554,6 +2347,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1565,7 +2371,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1575,27 +2381,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1610,6 +2438,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1709,15 +2551,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1737,6 +2584,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1748,7 +2608,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1758,27 +2618,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1793,6 +2675,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out index d8c4b7fc6f..0e2f15f72e 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out @@ -30,6 +30,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -37,7 +39,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -53,7 +58,35 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE @@ -78,6 +111,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col0 > 500) (type: boolean) Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE @@ -99,6 +145,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 > 500) (type: boolean) + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -113,6 +189,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + Stage: Stage-1 Move Operator tables: @@ -217,6 +307,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-3 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-0 depends on stages: Stage-2 Stage-4 depends on stages: Stage-0 @@ -224,7 +316,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -241,7 +336,35 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -263,6 +386,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col0 > 450) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -284,6 +420,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 > 450) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-1 Move Operator @@ -298,6 +464,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + Stage: Stage-0 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out index 81b882aeb4..1654ad74f1 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out @@ -32,6 +32,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -88,6 +90,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: count + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(count, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Group By Operator aggregations: percentile_approx(VALUE._col0, 0.5) mode: complete @@ -101,6 +123,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: double) + outputColumnNames: percentile + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(percentile, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -115,6 +157,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: count + Column Types: int + Table: default.e1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: percentile + Column Types: double + Table: default.e2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out index 92d10f43ec..ac6c8d3afb 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out @@ -40,6 +40,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -48,7 +50,10 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Reducer 7 (GROUP, 1) + Reducer 6 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -77,7 +82,35 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) - Reducer 3 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -99,6 +132,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) @@ -117,6 +163,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Reducer 7 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: key, keyd, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Stage: Stage-0 Move Operator @@ -131,6 +204,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, keyd, value + Column Types: string, double, string + Table: default.e2 + Stage: Stage-1 Move Operator tables: @@ -162,6 +249,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -170,7 +259,10 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Reducer 7 (GROUP, 1) + Reducer 6 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -199,7 +291,35 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) - Reducer 3 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -221,6 +341,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) @@ -239,6 +372,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Reducer 7 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: key, keyd, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Stage: Stage-0 Move Operator @@ -253,6 +413,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, keyd, value + Column Types: string, double, string + Table: default.e2 + Stage: Stage-1 Move Operator tables: @@ -1598,6 +1772,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -1605,11 +1781,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -1629,7 +1807,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -1669,9 +1847,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -1689,6 +1894,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1703,6 +1935,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e3 + Stage: Stage-1 Move Operator tables: @@ -1738,6 +1984,9 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 Stage-2 depends on stages: Stage-3 @@ -1747,8 +1996,13 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 10 <- Reducer 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 9 (GROUP, 1) + Reducer 5 <- Reducer 10 (GROUP, 1) + Reducer 6 <- Reducer 8 (GROUP, 1) + Reducer 7 <- Map 1 (SORT, 1) + Reducer 8 <- Map 1 (SORT, 1) + Reducer 9 <- Reducer 7 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1766,7 +2020,76 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: string) - Reducer 2 + Reducer 10 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: key, keyd, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: string) @@ -1796,7 +2119,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 - Reducer 3 + Reducer 8 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string), _col1 (type: double), _col2 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToDouble(_col3) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 9 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1818,6 +2170,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) @@ -1850,6 +2215,27 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, keyd, value + Column Types: string, double, string + Table: default.e2 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out index 7b804daba3..9e975a776c 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out @@ -48,15 +48,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src_10 @@ -82,6 +87,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 @@ -104,6 +122,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Lateral View Forward Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -147,6 +178,92 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Map 5 + Map Operator Tree: + TableScan + alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col0, _col5 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToString(_col5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator + expressions: array((key + 3),(key + 4)) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col5 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToString(_col5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -161,6 +278,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 + Stage: Stage-1 Move Operator tables: @@ -262,6 +393,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -269,11 +402,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 2) - Reducer 3 <- Map 5 (GROUP, 2) + Reducer 2 <- Map 6 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src_10 @@ -321,7 +456,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) - Map 5 + Map 7 Map Operator Tree: TableScan alias: src_10 @@ -389,9 +524,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -409,6 +571,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -423,6 +612,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 + Stage: Stage-1 Move Operator tables: @@ -506,6 +709,9 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 Stage-2 depends on stages: Stage-3 @@ -515,11 +721,15 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 10 <- Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 7 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 5 <- Reducer 9 (GROUP, 1) + Reducer 6 <- Reducer 10 (GROUP, 1) + Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 4 + Map 7 Map Operator Tree: TableScan alias: src_10 @@ -567,7 +777,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) - Map 5 + Map 8 Map Operator Tree: TableScan alias: src_10 @@ -581,6 +791,36 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) + Reducer 10 + Reduce Operator Tree: + Forward + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 200) (type: boolean) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToString(_col1) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -601,8 +841,63 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: Forward Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -626,6 +921,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE @@ -661,6 +969,27 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv3 + Stage: Stage-1 Move Operator tables: @@ -771,6 +1100,9 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 Stage-2 depends on stages: Stage-3 @@ -780,12 +1112,35 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 8 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 9 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 6 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 10 + Map Operator Tree: + TableScan + alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(DISTINCT key) + keys: value (type: string), key (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Map 8 Map Operator Tree: TableScan alias: src_10 @@ -831,7 +1186,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 9 Map Operator Tree: TableScan alias: src_10 @@ -877,26 +1232,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map 7 - Map Operator Tree: - TableScan - alias: src_10 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(DISTINCT key) - keys: value (type: string), key (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator @@ -917,9 +1252,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: double) mode: mergepartial @@ -937,7 +1299,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) @@ -957,6 +1346,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -971,6 +1387,27 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv3 + Stage: Stage-1 Move Operator tables: @@ -1121,6 +1558,10 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8 + Stage-10 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8 + Stage-11 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8 + Stage-12 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8 Stage-1 depends on stages: Stage-4 Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -1132,12 +1573,17 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 2) + Reducer 13 <- Map 11 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 9 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 7 <- Reducer 12 (GROUP, 1) + Reducer 8 <- Reducer 13 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 10 Map Operator Tree: TableScan alias: src_10 @@ -1163,7 +1609,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: array((key + 1),(key + 2)) (type: array) + expressions: array((key + 3),(key + 4)) (type: array) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE UDTF Operator @@ -1183,7 +1629,20 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 11 + Map Operator Tree: + TableScan + alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 200) or (key > 200)) (type: boolean) + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string), key (type: string) + sort order: ++ + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Map 9 Map Operator Tree: TableScan alias: src_10 @@ -1209,7 +1668,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: array((key + 3),(key + 4)) (type: array) + expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE UDTF Operator @@ -1229,60 +1688,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map 7 - Map Operator Tree: - TableScan - alias: src_10 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 200) or (key > 200)) (type: boolean) - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string), key (type: string) - sort order: ++ - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: sum(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToString(_col1) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv1 - Reducer 3 - Reduce Operator Tree: - Group By Operator - aggregations: sum(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToString(_col1) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv2 - Reducer 4 + Reducer 12 Reduce Operator Tree: Forward Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE @@ -1307,6 +1713,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col1:0._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE @@ -1328,6 +1747,158 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv4 + Reducer 13 + Reduce Operator Tree: + Forward + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col1:0._col0 < 200) (type: boolean) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToString(_col1) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToString(_col1) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToString(_col1) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1342,6 +1913,34 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv3 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv4 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out index 2b28d5313e..37911c567b 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out @@ -38,6 +38,9 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 Stage-2 depends on stages: Stage-3 @@ -47,13 +50,14 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 6 (GROUP, 2) + Reducer 2 <- Map 7 (GROUP, 2) Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Map 7 (GROUP, 2) + Reducer 4 <- Map 8 (GROUP, 2) Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 6 <- Map 9 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 7 Map Operator Tree: TableScan alias: src @@ -89,7 +93,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi3 - Map 7 + Map 8 Map Operator Tree: TableScan alias: src @@ -110,6 +114,31 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Map 9 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -137,6 +166,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: Group By Operator @@ -164,6 +213,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -178,6 +261,27 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out index 7f1d67b566..ff53788557 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out @@ -29,15 +29,20 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -57,6 +62,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -72,6 +90,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -89,6 +160,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -170,15 +255,20 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -198,6 +288,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -213,6 +316,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -230,6 +386,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -311,15 +481,20 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -339,6 +514,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -354,6 +542,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -371,6 +612,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -452,15 +707,20 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -480,6 +740,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -495,6 +768,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -512,6 +838,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -593,6 +933,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -600,7 +942,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -616,7 +961,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -636,6 +1009,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -652,6 +1038,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-3 Dependency Collection @@ -669,6 +1080,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -743,6 +1168,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -750,7 +1177,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -766,7 +1196,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -786,6 +1244,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -802,6 +1273,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-3 Dependency Collection @@ -819,6 +1315,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -893,6 +1403,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -900,7 +1412,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -916,7 +1431,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -936,6 +1479,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -952,6 +1508,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-3 Dependency Collection @@ -969,6 +1550,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1043,6 +1638,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -1050,7 +1647,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1066,7 +1666,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -1086,6 +1714,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1102,6 +1743,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-3 Dependency Collection @@ -1119,6 +1785,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1193,15 +1873,20 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1221,6 +1906,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1232,7 +1930,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1242,27 +1940,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1280,6 +2000,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1380,46 +2114,20 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - Map 2 + Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 5 Map Operator Tree: TableScan alias: src @@ -1439,6 +2147,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1450,6 +2171,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 6 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1467,6 +2241,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1567,15 +2355,20 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1595,6 +2388,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1606,7 +2412,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1616,27 +2422,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1654,6 +2482,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1754,15 +2596,20 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1782,6 +2629,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1793,7 +2653,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1803,27 +2663,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1841,6 +2723,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -2784,6 +3680,8 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-6, Stage-7, Stage-2, Stage-3 + Stage-9 depends on stages: Stage-6, Stage-7, Stage-2, Stage-3 Stage-1 depends on stages: Stage-5 Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -2793,11 +3691,13 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP, 1) + Reducer 3 <- Map 7 (GROUP, 1) + Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: src @@ -2817,6 +3717,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -2832,6 +3745,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 8 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -2842,6 +3785,34 @@ STAGE PLANS: Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -2873,7 +3844,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 + Reducer 5 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -2903,6 +3874,20 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -3004,6 +3989,8 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-6, Stage-7, Stage-2, Stage-3 + Stage-9 depends on stages: Stage-6, Stage-7, Stage-2, Stage-3 Stage-1 depends on stages: Stage-5 Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -3013,11 +4000,13 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP, 1) + Reducer 3 <- Map 7 (GROUP, 1) + Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: src @@ -3037,6 +4026,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3052,6 +4054,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 8 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3062,6 +4094,34 @@ STAGE PLANS: Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -3093,7 +4153,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 + Reducer 5 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -3123,6 +4183,20 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -3224,6 +4298,8 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-6, Stage-7, Stage-2, Stage-3 + Stage-9 depends on stages: Stage-6, Stage-7, Stage-2, Stage-3 Stage-1 depends on stages: Stage-5 Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -3233,11 +4309,13 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP, 1) + Reducer 3 <- Map 7 (GROUP, 1) + Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: src @@ -3257,6 +4335,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3272,6 +4363,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 8 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3282,6 +4403,34 @@ STAGE PLANS: Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -3313,7 +4462,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 + Reducer 5 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -3343,6 +4492,20 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -3444,6 +4607,8 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-6, Stage-7, Stage-2, Stage-3 + Stage-9 depends on stages: Stage-6, Stage-7, Stage-2, Stage-3 Stage-1 depends on stages: Stage-5 Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -3453,11 +4618,13 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP, 1) + Reducer 3 <- Map 7 (GROUP, 1) + Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: src @@ -3477,6 +4644,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3492,6 +4672,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 8 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3502,6 +4712,34 @@ STAGE PLANS: Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -3533,7 +4771,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 + Reducer 5 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -3563,6 +4801,20 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out b/ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out index a146a8e83b..765c103e02 100644 --- a/ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out +++ b/ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out @@ -52,6 +52,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -59,11 +61,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 2) - Reducer 3 <- Map 5 (GROUP, 2) + Reducer 2 <- Map 6 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: tbl @@ -84,7 +88,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: bigint) - Map 5 + Map 7 Map Operator Tree: TableScan alias: tbl @@ -125,9 +129,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial @@ -145,6 +176,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -159,6 +217,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -186,6 +258,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -193,11 +267,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 2) - Reducer 3 <- Map 5 (GROUP, 2) + Reducer 2 <- Map 6 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: tbl @@ -218,7 +294,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: bigint) - Map 5 + Map 7 Map Operator Tree: TableScan alias: tbl @@ -259,9 +335,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial @@ -279,6 +382,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -293,6 +423,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -320,6 +464,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -327,11 +473,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 2) - Reducer 3 <- Map 5 (GROUP, 2) + Reducer 2 <- Map 6 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: tbl @@ -352,7 +500,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: bigint) - Map 5 + Map 7 Map Operator Tree: TableScan alias: tbl @@ -393,9 +541,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16), compute_stats(d4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial @@ -413,6 +588,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -427,6 +629,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -454,6 +670,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -461,7 +679,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -479,7 +700,35 @@ STAGE PLANS: Map-reduce partition columns: c1 (type: int), c2 (type: int), c3 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: c4 (type: int) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -501,6 +750,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16), compute_stats(d4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col2 (type: int), KEY._col1 (type: int) @@ -519,6 +781,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest4 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col2 (type: int), KEY._col1 (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16), compute_stats(d4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Stage: Stage-0 Move Operator @@ -533,6 +822,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest4 + Stage: Stage-1 Move Operator tables: @@ -562,6 +865,9 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 Stage-2 depends on stages: Stage-3 @@ -571,12 +877,36 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 5 (GROUP, 2) - Reducer 3 <- Map 6 (GROUP, 2) - Reducer 4 <- Map 7 (GROUP, 2) + Reducer 2 <- Map 8 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 9 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 6 <- Map 10 (GROUP, 2) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 10 + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: c1 (type: int), c2 (type: int) + outputColumnNames: c1, c2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count(c2) + keys: c1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: bigint) + Map 8 Map Operator Tree: TableScan alias: tbl @@ -597,7 +927,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: bigint) - Map 6 + Map 9 Map Operator Tree: TableScan alias: tbl @@ -618,27 +948,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col2 (type: bigint) - Map 7 - Map Operator Tree: - TableScan - alias: tbl - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: c1 (type: int), c2 (type: int) - outputColumnNames: c1, c2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count(c2) - keys: c1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -659,9 +968,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16), compute_stats(d4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial @@ -679,7 +1015,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Reducer 4 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -699,6 +1062,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -713,6 +1103,27 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/orc_merge1.q.out b/ql/src/test/results/clientpositive/spark/orc_merge1.q.out index 1407616c01..b69185431f 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge1.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge1.q.out @@ -54,10 +54,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -77,6 +80,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -94,6 +132,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part FROM src @@ -129,6 +174,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -136,6 +182,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -155,6 +203,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -181,6 +264,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b + Stage: Stage-3 Spark #### A masked pattern was here #### @@ -252,6 +342,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -259,6 +350,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -278,6 +371,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -304,6 +432,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c + Stage: Stage-3 Spark #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/orc_merge2.q.out b/ql/src/test/results/clientpositive/spark/orc_merge2.q.out index b7f1a65e22..b4ac6cc15d 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge2.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge2.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -37,6 +38,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -56,6 +59,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge2a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string) + outputColumnNames: key, value, one, two, three + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: one (type: string), two (type: string), three (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -83,6 +121,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge2a + Stage: Stage-3 Spark #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/orc_merge3.q.out b/ql/src/test/results/clientpositive/spark/orc_merge3.q.out index 81a6013d1e..189173aeda 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge3.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge3.q.out @@ -60,6 +60,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -67,6 +68,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -86,6 +89,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -109,6 +139,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b + Stage: Stage-3 Spark #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/orc_merge4.q.out b/ql/src/test/results/clientpositive/spark/orc_merge4.q.out index 8d433b031a..eb2aae474b 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge4.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge4.q.out @@ -78,6 +78,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -85,6 +86,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -104,6 +107,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 94000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -127,6 +157,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b + Stage: Stage-3 Spark #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/orc_merge5.q.out b/ql/src/test/results/clientpositive/spark/orc_merge5.q.out index 1f8c869574..6c8c83cde9 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge5.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge5.q.out @@ -30,10 +30,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -57,6 +60,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -71,6 +101,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b + PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 @@ -115,6 +152,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -122,6 +160,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -145,6 +185,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -168,6 +235,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b + Stage: Stage-3 Spark #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/orc_merge6.q.out b/ql/src/test/results/clientpositive/spark/orc_merge6.q.out index be62faed0e..359341a993 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge6.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge6.q.out @@ -30,10 +30,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -57,6 +60,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -74,6 +112,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + PREHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 @@ -160,6 +205,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -167,6 +213,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -190,6 +238,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -216,6 +299,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + Stage: Stage-3 Spark #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/orc_merge7.q.out b/ql/src/test/results/clientpositive/spark/orc_merge7.q.out index 01e3eac54a..d0daebde94 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge7.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge7.q.out @@ -30,10 +30,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -53,6 +56,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -69,6 +107,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 @@ -194,6 +239,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -201,6 +247,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -220,6 +268,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -245,6 +328,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + Stage: Stage-3 Spark #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out b/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out index 1407616c01..b69185431f 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out @@ -54,10 +54,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -77,6 +80,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -94,6 +132,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part FROM src @@ -129,6 +174,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -136,6 +182,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -155,6 +203,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -181,6 +264,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b + Stage: Stage-3 Spark #### A masked pattern was here #### @@ -252,6 +342,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -259,6 +350,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -278,6 +371,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -304,6 +432,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c + Stage: Stage-3 Spark #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/orc_merge_incompat1.q.out b/ql/src/test/results/clientpositive/spark/orc_merge_incompat1.q.out index 65790c4962..0f214f61b2 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge_incompat1.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge_incompat1.q.out @@ -30,10 +30,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -56,6 +59,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -70,6 +100,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b + PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 diff --git a/ql/src/test/results/clientpositive/spark/orc_merge_incompat2.q.out b/ql/src/test/results/clientpositive/spark/orc_merge_incompat2.q.out index 52973c87bb..d861d47815 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge_incompat2.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge_incompat2.q.out @@ -30,10 +30,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -53,6 +56,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -69,6 +107,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 order by userid PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 diff --git a/ql/src/test/results/clientpositive/spark/parallel.q.out b/ql/src/test/results/clientpositive/spark/parallel.q.out index e31fcf0b5a..a0847e4075 100644 --- a/ql/src/test/results/clientpositive/spark/parallel.q.out +++ b/ql/src/test/results/clientpositive/spark/parallel.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -36,7 +38,10 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Reducer 7 (GROUP, 1) + Reducer 6 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -70,7 +75,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -87,6 +120,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_a + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete @@ -100,6 +146,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_b + Reducer 7 + Reduce Operator Tree: + Forward + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -114,6 +182,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_a + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_b + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/parallel_join1.q.out b/ql/src/test/results/clientpositive/spark/parallel_join1.q.out index 7fdd48d2ca..4be8e0c3b3 100644 --- a/ql/src/test/results/clientpositive/spark/parallel_join1.q.out +++ b/ql/src/test/results/clientpositive/spark/parallel_join1.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -43,7 +45,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -83,6 +85,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -97,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/pcr.q.out b/ql/src/test/results/clientpositive/spark/pcr.q.out index 74e1845dd2..ca0e4bbc65 100644 --- a/ql/src/test/results/clientpositive/spark/pcr.q.out +++ b/ql/src/test/results/clientpositive/spark/pcr.q.out @@ -96,7 +96,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -143,7 +143,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -291,7 +291,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -338,7 +338,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -385,7 +385,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -571,7 +571,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -618,7 +618,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -768,7 +768,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -815,7 +815,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -967,7 +967,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1014,7 +1014,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1061,7 +1061,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1224,7 +1224,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1271,7 +1271,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1318,7 +1318,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1488,7 +1488,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1535,7 +1535,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1668,7 +1668,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1715,7 +1715,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1888,7 +1888,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1935,7 +1935,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1982,7 +1982,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2182,7 +2182,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2229,7 +2229,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2370,7 +2370,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2445,7 +2445,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2643,7 +2643,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2718,7 +2718,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2925,7 +2925,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2972,7 +2972,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3019,7 +3019,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3066,7 +3066,7 @@ STAGE PLANS: partition values: ds 2000-04-11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3251,7 +3251,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3298,7 +3298,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3345,7 +3345,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3509,15 +3509,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: pcr_t1 @@ -3559,6 +3564,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 @@ -3606,7 +3627,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3647,6 +3668,144 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-08 [pcr_t1] + Map 5 + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [pcr_t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3680,6 +3839,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t2 + Is Table Level Stats: true + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t3 + Is Table Level Stats: true + Stage: Stage-1 Move Operator tables: @@ -3746,15 +3921,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: pcr_t1 @@ -3779,7 +3959,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3800,6 +3980,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: 2 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: (key = 3) (type: boolean) @@ -3819,7 +4015,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3851,7 +4047,89 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [pcr_t1] + Map 5 + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = 3) (type: boolean) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 3 (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 3 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3892,6 +4170,66 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-08 [pcr_t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3902,7 +4240,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3925,6 +4263,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t2 + Is Table Level Stats: true + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t3 + Is Table Level Stats: true + Stage: Stage-1 Move Operator tables: @@ -3934,7 +4288,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/spark/ppd_join5.q.out b/ql/src/test/results/clientpositive/spark/ppd_join5.q.out index 2b05a1c917..d5624ad7f2 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join5.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join5.q.out @@ -32,7 +32,7 @@ POSTHOOK: Lineage: t1.id1 SIMPLE [] POSTHOOK: Lineage: t1.id2 SIMPLE [] POSTHOOK: Lineage: t2.d SIMPLE [] POSTHOOK: Lineage: t2.id SIMPLE [] -Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select a.*,b.d d1,c.d d2 from t1 a join t2 b on (a.id1 = b.id) @@ -51,8 +51,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -68,70 +68,71 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Map 4 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d <= 1) and id is not null) (type: boolean) + predicate: (d <= 1) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: id (type: string), d (type: int) - outputColumnNames: _col0, _col1 + expressions: d (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col0 (type: string) + sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + value expressions: _col0 (type: int) Map 5 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d <= 1) (type: boolean) + predicate: ((d <= 1) and id is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d (type: int) - outputColumnNames: _col0 + expressions: id (type: string), d (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: + key expressions: _col0 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col0 (type: string) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col1 (type: int) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int) + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col3, _col4 + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int), _col4 (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -148,7 +149,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from ( select a.*,b.d d1,c.d d2 from @@ -171,8 +172,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -188,73 +189,74 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Map 4 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d <= 1) and id is not null) (type: boolean) + predicate: (d <= 1) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: id (type: string), d (type: int) - outputColumnNames: _col0, _col1 + expressions: d (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col0 (type: string) + sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + value expressions: _col0 (type: int) Map 5 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d <= 1) (type: boolean) + predicate: ((d <= 1) and id is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d (type: int) - outputColumnNames: _col0 + expressions: id (type: string), d (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: + key expressions: _col0 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col0 (type: string) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col1 (type: int) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int) + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col3, _col4 + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col3 > 1) or (_col4 > 1)) (type: boolean) + predicate: ((_col4 > 1) or (_col2 > 1)) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int), _col4 (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -271,7 +273,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: select * from ( select a.*,b.d d1,c.d d2 from t1 a join t2 b on (a.id1 = b.id) diff --git a/ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out b/ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out index 12b1724990..def99b2bb6 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out @@ -48,6 +48,9 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-5, Stage-6, Stage-7, Stage-3 + Stage-9 depends on stages: Stage-5, Stage-6, Stage-7, Stage-3 + Stage-10 depends on stages: Stage-5, Stage-6, Stage-7, Stage-3 Stage-1 depends on stages: Stage-4 Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -58,7 +61,12 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 7 (GROUP, 1) + Reducer 4 <- Reducer 8 (GROUP, 1) + Reducer 5 <- Reducer 9 (GROUP, 2) + Reducer 7 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 9 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -75,7 +83,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Map 3 + Map 6 Map Operator Tree: TableScan alias: b @@ -88,14 +96,58 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -113,6 +165,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -157,6 +222,63 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) Stage: Stage-0 Move Operator @@ -171,6 +293,27 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi2 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.mi3 + Stage: Stage-1 Move Operator tables: @@ -1321,6 +1464,9 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-5, Stage-6, Stage-7, Stage-3 + Stage-9 depends on stages: Stage-5, Stage-6, Stage-7, Stage-3 + Stage-10 depends on stages: Stage-5, Stage-6, Stage-7, Stage-3 Stage-1 depends on stages: Stage-4 Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -1331,7 +1477,12 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 7 (GROUP, 1) + Reducer 4 <- Reducer 8 (GROUP, 1) + Reducer 5 <- Reducer 9 (GROUP, 2) + Reducer 7 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 9 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1348,7 +1499,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Map 3 + Map 6 Map Operator Tree: TableScan alias: b @@ -1361,14 +1512,58 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1386,6 +1581,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -1430,6 +1638,63 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) Stage: Stage-0 Move Operator @@ -1444,6 +1709,27 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi2 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.mi3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/ptf.q.out b/ql/src/test/results/clientpositive/spark/ptf.q.out index 2e31fbd177..5146bc8a93 100644 --- a/ql/src/test/results/clientpositive/spark/ptf.q.out +++ b/ql/src/test/results/clientpositive/spark/ptf.q.out @@ -2865,6 +2865,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -2872,11 +2874,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 8 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 1) + Reducer 8 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 9 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2942,8 +2946,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: double) + outputColumnNames: p_mfgr, p_name, p_size, r, dr, s + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(p_mfgr, 16), compute_stats(p_name, 16), compute_stats(p_size, 16), compute_stats(r, 16), compute_stats(dr, 16), compute_stats(s, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2888 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2888 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reducer 4 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col1, _col2, _col5 @@ -2978,7 +3009,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: sum_window_0 (type: bigint), _col5 (type: int) - Reducer 5 + Reducer 6 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -3037,7 +3068,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_5 - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: double), _col7 (type: int) + outputColumnNames: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(p_mfgr, 16), compute_stats(p_name, 16), compute_stats(p_size, 16), compute_stats(s2, 16), compute_stats(r, 16), compute_stats(dr, 16), compute_stats(cud, 16), compute_stats(fv1, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3864 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3061,7 +3119,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) - Reducer 7 + Reducer 9 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3099,6 +3157,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, r, dr, s + Column Types: string, string, int, int, int, double + Table: default.part_4 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 + Column Types: string, string, int, int, int, int, double, int + Table: default.part_5 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/reduce_deduplicate.q.out b/ql/src/test/results/clientpositive/spark/reduce_deduplicate.q.out index 804ff02948..ee4de359e6 100644 --- a/ql/src/test/results/clientpositive/spark/reduce_deduplicate.q.out +++ b/ql/src/test/results/clientpositive/spark/reduce_deduplicate.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -136,6 +137,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -170,6 +206,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.bucket5_1 + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucket5_1 select * from src cluster by key PREHOOK: type: QUERY @@ -252,12 +296,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -346,6 +392,60 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: bigint), _col5 (type: string), _col6 (type: string), '2010-03-29' (type: string) + outputColumnNames: aid, bid, t, ctime, etime, l, et, ds + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(aid, 16), compute_stats(bid, 16), compute_stats(t, 16), compute_stats(ctime, 16), compute_stats(etime, 16), compute_stats(l, 16), compute_stats(et, 16) + keys: '2010-03-29' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: '2010-03-29' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '2010-03-29' (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + keys: '2010-03-29' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), '2010-03-29' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types struct:struct:struct:struct:struct:struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -378,3 +478,11 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: aid, bid, t, ctime, etime, l, et + Column Types: string, string, int, string, bigint, string, string + Table: default.complex_tbl_1 + Is Table Level Stats: false + diff --git a/ql/src/test/results/clientpositive/spark/sample1.q.out b/ql/src/test/results/clientpositive/spark/sample1.q.out index ee9eb14685..12de58c23d 100644 --- a/ql/src/test/results/clientpositive/spark/sample1.q.out +++ b/ql/src/test/results/clientpositive/spark/sample1.q.out @@ -20,10 +20,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +75,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, dt, hr + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(dt, 16), compute_stats(hr, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -125,6 +144,36 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [s] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -158,6 +207,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value, dt, hr + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1 ON rand()) s WHERE s.ds='2008-04-08' and s.hr='11' diff --git a/ql/src/test/results/clientpositive/spark/sample10.q.out b/ql/src/test/results/clientpositive/spark/sample10.q.out index d589216172..40a7392cb3 100644 --- a/ql/src/test/results/clientpositive/spark/sample10.q.out +++ b/ql/src/test/results/clientpositive/spark/sample10.q.out @@ -93,7 +93,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , @@ -143,7 +143,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , @@ -193,7 +193,7 @@ STAGE PLANS: ds 2008-04-09 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , @@ -243,7 +243,7 @@ STAGE PLANS: ds 2008-04-09 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/spark/sample2.q.out b/ql/src/test/results/clientpositive/spark/sample2.q.out index 85266d1bd8..c035b52b4d 100644 --- a/ql/src/test/results/clientpositive/spark/sample2.q.out +++ b/ql/src/test/results/clientpositive/spark/sample2.q.out @@ -18,10 +18,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -71,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -124,6 +143,36 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -157,6 +206,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 2) s PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/sample4.q.out b/ql/src/test/results/clientpositive/spark/sample4.q.out index 69e7ee94f8..083989cc0f 100644 --- a/ql/src/test/results/clientpositive/spark/sample4.q.out +++ b/ql/src/test/results/clientpositive/spark/sample4.q.out @@ -18,10 +18,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -71,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -124,6 +143,36 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -157,6 +206,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 2 on key) s PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/sample5.q.out b/ql/src/test/results/clientpositive/spark/sample5.q.out index 558b2dbb68..3b1823bb5c 100644 --- a/ql/src/test/results/clientpositive/spark/sample5.q.out +++ b/ql/src/test/results/clientpositive/spark/sample5.q.out @@ -20,10 +20,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +75,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -125,6 +144,36 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket [s] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -158,6 +207,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 5 on key) s diff --git a/ql/src/test/results/clientpositive/spark/sample6.q.out b/ql/src/test/results/clientpositive/spark/sample6.q.out index 4b358291e4..5d3cd89c2e 100644 --- a/ql/src/test/results/clientpositive/spark/sample6.q.out +++ b/ql/src/test/results/clientpositive/spark/sample6.q.out @@ -18,10 +18,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -71,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -124,6 +143,36 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -157,6 +206,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 4 on key) s PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/sample7.q.out b/ql/src/test/results/clientpositive/spark/sample7.q.out index eae33cad36..a0bdcce274 100644 --- a/ql/src/test/results/clientpositive/spark/sample7.q.out +++ b/ql/src/test/results/clientpositive/spark/sample7.q.out @@ -20,10 +20,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +75,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -125,6 +144,36 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -158,6 +207,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 4 on key) s WHERE s.key > 100 diff --git a/ql/src/test/results/clientpositive/spark/skewjoin.q.out b/ql/src/test/results/clientpositive/spark/skewjoin.q.out index b0b28c3114..7628a572e9 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoin.q.out @@ -80,17 +80,19 @@ INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-4 depends on stages: Stage-1 , consists of Stage-5, Stage-0 - Stage-5 - Stage-3 depends on stages: Stage-5 + Stage-5 depends on stages: Stage-1 , consists of Stage-6, Stage-3 + Stage-6 + Stage-4 depends on stages: Stage-6 + Stage-3 depends on stages: Stage-4 Stage-0 depends on stages: Stage-3 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -110,7 +112,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -151,15 +153,30 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 + Stage: Stage-5 Conditional Operator - Stage: Stage-5 + Stage: Stage-6 Spark #### A masked pattern was here #### Vertices: - Map 5 + Map 7 Map Operator Tree: TableScan Spark HashTable Sink Operator @@ -169,11 +186,11 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan Map Join Operator @@ -195,9 +212,52 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work + Stage: Stage-3 + Spark + Edges: + Reducer 3 <- Map 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Move Operator tables: @@ -211,6 +271,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out index 9424361c56..b50baedc1f 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out @@ -50,12 +50,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -101,7 +103,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -183,6 +185,60 @@ STAGE PLANS: TotalFiles: 16 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -216,6 +272,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 @@ -1798,7 +1862,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -1871,7 +1935,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out index d0bb917eea..3199333a6b 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out @@ -62,12 +62,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -114,7 +116,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -197,6 +199,60 @@ STAGE PLANS: TotalFiles: 16 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1650 Data size: 17529 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1650 Data size: 17529 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 1650 Data size: 17529 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 825 Data size: 8764 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 825 Data size: 8764 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 825 Data size: 8764 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -231,6 +287,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 @@ -278,12 +342,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -330,7 +396,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -413,6 +479,60 @@ STAGE PLANS: TotalFiles: 16 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3392 Data size: 36194 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3392 Data size: 36194 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '2' (type: string) + Statistics: Num rows: 3392 Data size: 36194 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1696 Data size: 18097 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '2' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1696 Data size: 18097 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1696 Data size: 18097 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -447,6 +567,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out index d8bd846a01..6197f20830 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out @@ -120,7 +120,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -143,7 +143,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -267,7 +267,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -290,7 +290,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -366,7 +366,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -389,7 +389,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out index d652926ae9..59c5071156 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out @@ -82,7 +82,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -105,7 +105,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -160,7 +160,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -183,7 +183,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -388,7 +388,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -411,7 +411,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -466,7 +466,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -489,7 +489,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -642,7 +642,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -665,7 +665,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -720,7 +720,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -743,7 +743,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -896,7 +896,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -919,7 +919,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -974,7 +974,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -997,7 +997,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_18.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_18.q.out index 6ed3c21a99..d5427fa67a 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_18.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_18.q.out @@ -42,6 +42,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -81,6 +82,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY @@ -213,12 +221,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -253,6 +263,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -269,6 +314,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238 PREHOOK: type: QUERY @@ -348,6 +400,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -387,6 +440,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_19.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_19.q.out index fb2501597d..29793d58c6 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_19.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_19.q.out @@ -42,6 +42,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -81,6 +82,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out index f35a33d8dc..2bf9c94b4d 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out @@ -42,12 +42,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -79,6 +81,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -95,6 +132,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY @@ -163,6 +207,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -202,6 +247,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY @@ -282,12 +334,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -319,6 +373,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + keys: '2' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2' (type: string) + sort order: + + Map-reduce partition columns: '2' (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '2' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '2' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -335,3 +424,10 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.test_table2 + diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out index 1a9118d378..2964fc36c2 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out @@ -42,6 +42,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -81,6 +82,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -111,12 +119,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -148,6 +158,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -164,6 +209,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -194,12 +246,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -230,6 +284,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -246,6 +335,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -276,12 +372,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -313,6 +411,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -329,6 +462,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -359,12 +499,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -396,6 +538,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -412,6 +589,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -442,12 +626,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -478,6 +664,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -494,3 +715,10 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_22.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_22.q.out index d8bdef2d25..2cd07fca98 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_22.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_22.q.out @@ -40,6 +40,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -77,6 +78,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT * FROM test_table1 PREHOOK: type: QUERY @@ -183,6 +191,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -220,6 +229,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT * FROM test_table1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out index 7cf3cf7e64..69f4587d76 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out @@ -66,12 +66,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -92,7 +94,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -128,6 +130,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -142,6 +171,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results + PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key PREHOOK: type: QUERY @@ -1238,12 +1274,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1264,7 +1302,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -1300,6 +1338,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1314,6 +1379,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results + PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key PREHOOK: type: QUERY @@ -2426,12 +2498,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2452,7 +2526,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -2488,6 +2562,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -2502,6 +2603,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results + PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 PREHOOK: type: QUERY @@ -2530,12 +2638,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2556,7 +2666,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -2592,6 +2702,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -2606,6 +2743,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results + PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out index 622b950cef..49ccee7b73 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out @@ -618,10 +618,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -649,6 +652,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -663,6 +693,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results + PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/stats0.q.out b/ql/src/test/results/clientpositive/spark/stats0.q.out index ece5f08fbd..adad782223 100644 --- a/ql/src/test/results/clientpositive/spark/stats0.q.out +++ b/ql/src/test/results/clientpositive/spark/stats0.q.out @@ -18,10 +18,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -66,6 +69,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -117,6 +136,36 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -150,6 +199,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_non_partitioned + Is Table Level Stats: true + PREHOOK: query: insert overwrite table stats_non_partitioned select * from src PREHOOK: type: QUERY @@ -700,10 +757,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -723,6 +783,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.stats_partitioned + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -739,6 +834,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_partitioned + PREHOOK: query: insert overwrite table stats_partitioned partition (ds='1') select * from src PREHOOK: type: QUERY @@ -1336,10 +1438,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1384,6 +1489,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1435,6 +1556,36 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1468,6 +1619,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_non_partitioned + Is Table Level Stats: true + PREHOOK: query: insert overwrite table stats_non_partitioned select * from src PREHOOK: type: QUERY @@ -2018,10 +2177,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2041,6 +2203,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.stats_partitioned + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -2057,6 +2254,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_partitioned + PREHOOK: query: insert overwrite table stats_partitioned partition (ds='1') select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/stats1.q.out b/ql/src/test/results/clientpositive/spark/stats1.q.out index e691f5105e..94216dba6e 100644 --- a/ql/src/test/results/clientpositive/spark/stats1.q.out +++ b/ql/src/test/results/clientpositive/spark/stats1.q.out @@ -24,12 +24,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 4 (GROUP, 1), Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -48,7 +50,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: s2 @@ -65,6 +67,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -84,6 +99,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -98,6 +140,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable + PREHOOK: query: INSERT OVERWRITE TABLE tmptable SELECT unionsrc.key, unionsrc.value FROM (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s1 @@ -170,7 +219,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 26 rawDataSize 199 diff --git a/ql/src/test/results/clientpositive/spark/stats10.q.out b/ql/src/test/results/clientpositive/spark/stats10.q.out index 9c682fce76..63159c70ed 100644 --- a/ql/src/test/results/clientpositive/spark/stats10.q.out +++ b/ql/src/test/results/clientpositive/spark/stats10.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -54,6 +56,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket3_1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -70,6 +107,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket3_1 + PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1') select * from src PREHOOK: type: QUERY @@ -423,7 +467,7 @@ Database: default Table: bucket3_1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -462,7 +506,7 @@ Database: default Table: bucket3_1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/spark/stats14.q.out b/ql/src/test/results/clientpositive/spark/stats14.q.out index 85017462c3..1cd660cd67 100644 --- a/ql/src/test/results/clientpositive/spark/stats14.q.out +++ b/ql/src/test/results/clientpositive/spark/stats14.q.out @@ -42,7 +42,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -186,7 +186,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -226,7 +226,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/spark/stats15.q.out b/ql/src/test/results/clientpositive/spark/stats15.q.out index 85017462c3..1cd660cd67 100644 --- a/ql/src/test/results/clientpositive/spark/stats15.q.out +++ b/ql/src/test/results/clientpositive/spark/stats15.q.out @@ -42,7 +42,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -186,7 +186,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -226,7 +226,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/spark/stats18.q.out b/ql/src/test/results/clientpositive/spark/stats18.q.out index 4945808098..de8918a40a 100644 --- a/ql/src/test/results/clientpositive/spark/stats18.q.out +++ b/ql/src/test/results/clientpositive/spark/stats18.q.out @@ -39,7 +39,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/spark/stats_only_null.q.out b/ql/src/test/results/clientpositive/spark/stats_only_null.q.out index 359eea3acb..c537da5a8b 100644 --- a/ql/src/test/results/clientpositive/spark/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/spark/stats_only_null.q.out @@ -73,52 +73,12 @@ POSTHOOK: query: explain select count(*), count(a), count(b), count(c), count(d) from stats_null POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_null - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -129,52 +89,12 @@ POSTHOOK: query: explain select count(*), count(a), count(b), count(c), count(d) from stats_null_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_null_part - Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d - Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink diff --git a/ql/src/test/results/clientpositive/spark/statsfs.q.out b/ql/src/test/results/clientpositive/spark/statsfs.q.out index d070e9aa6f..16da73418f 100644 --- a/ql/src/test/results/clientpositive/spark/statsfs.q.out +++ b/ql/src/test/results/clientpositive/spark/statsfs.q.out @@ -176,7 +176,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -215,7 +215,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -343,7 +343,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -426,7 +426,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -466,7 +466,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out b/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out index a0adcaee08..af3177eefc 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out @@ -61,6 +61,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-3 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-0 depends on stages: Stage-2 Stage-4 depends on stages: Stage-0 @@ -68,14 +70,15 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 10 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 10 <- Map 9 (GROUP, 1) + Reducer 2 <- Map 11 (PARTITION-LEVEL SORT, 1), Reducer 10 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) - Reducer 9 <- Map 8 (GROUP, 1) + Reducer 5 <- Map 12 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 10 + Map 11 Map Operator Tree: TableScan alias: b @@ -84,7 +87,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) - Map 11 + Map 12 Map Operator Tree: TableScan alias: b @@ -97,7 +100,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 7 Map Operator Tree: TableScan alias: a @@ -119,7 +122,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 7 + Map 8 Map Operator Tree: TableScan alias: s1 @@ -136,7 +139,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 8 + Map 9 Map Operator Tree: TableScan alias: s1 @@ -155,6 +158,26 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Reducer 10 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: 0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -207,6 +230,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Reduce Operator Tree: Join Operator @@ -225,26 +268,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_4 - Reducer 9 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 6 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: 0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -259,6 +309,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_4 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_5 + Stage: Stage-0 Move Operator tables: @@ -314,6 +378,8 @@ RUN: Stage-1:MOVE RUN: Stage-0:MOVE RUN: Stage-3:STATS RUN: Stage-4:STATS +RUN: Stage-5:COLUMNSTATS +RUN: Stage-6:COLUMNSTATS PREHOOK: query: select * from src_4 PREHOOK: type: QUERY PREHOOK: Input: default@src_4 @@ -491,9 +557,12 @@ INSERT OVERWRITE TABLE src_5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4 + Stage-8 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -501,10 +570,10 @@ STAGE PLANS: Stage: Stage-5 Spark Edges: - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: a @@ -527,7 +596,7 @@ STAGE PLANS: 1 _col0 (type: string), _col1 (type: string) Local Work: Map Reduce Local Work - Map 4 + Map 5 Map Operator Tree: TableScan alias: s1 @@ -545,12 +614,102 @@ STAGE PLANS: 1 _col0 (type: string) Local Work: Map Reduce Local Work + Map 6 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '2') and key is null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 7 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: 0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-6 + Spark + Edges: + Reducer 7 <- Map 6 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + Map 6 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator predicate: ((key > '2') and key is null) (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -564,7 +723,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 6 + Reducer 7 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -591,10 +750,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) + Reducer 2 <- Map 8 (GROUP, 1) + Reducer 3 <- Map 9 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 8 Map Operator Tree: TableScan alias: b @@ -610,7 +770,7 @@ STAGE PLANS: 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -620,6 +780,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Local Work: + Map Reduce Local Work + Map 9 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Semi Join 0 to 1 @@ -628,7 +808,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 input vertices: - 1 Reducer 6 + 1 Reducer 7 Statistics: Num rows: 500 Data size: 9812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -638,7 +818,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col5 input vertices: - 1 Map 4 + 1 Map 5 Statistics: Num rows: 550 Data size: 10793 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col5 is null (type: boolean) @@ -656,6 +836,20 @@ STAGE PLANS: Map Reduce Local Work Reducer 2 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 @@ -668,6 +862,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -682,6 +896,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_4 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_5 + Stage: Stage-1 Move Operator tables: @@ -733,11 +961,14 @@ POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:strin POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] RUN: Stage-5:MAPRED +RUN: Stage-6:MAPRED RUN: Stage-2:MAPRED RUN: Stage-0:MOVE RUN: Stage-1:MOVE RUN: Stage-3:STATS RUN: Stage-4:STATS +RUN: Stage-7:COLUMNSTATS +RUN: Stage-8:COLUMNSTATS PREHOOK: query: select * from src_4 PREHOOK: type: QUERY PREHOOK: Input: default@src_4 @@ -746,17 +977,6 @@ POSTHOOK: query: select * from src_4 POSTHOOK: type: QUERY POSTHOOK: Input: default@src_4 #### A masked pattern was here #### -90 val_90 -90 val_90 -90 val_90 -92 val_92 -95 val_95 -95 val_95 -96 val_96 -97 val_97 -97 val_97 -98 val_98 -98 val_98 PREHOOK: query: select * from src_5 PREHOOK: type: QUERY PREHOOK: Input: default@src_5 diff --git a/ql/src/test/results/clientpositive/spark/union10.q.out b/ql/src/test/results/clientpositive/spark/union10.q.out index 2b3afecac7..171595ac9c 100644 --- a/ql/src/test/results/clientpositive/spark/union10.q.out +++ b/ql/src/test/results/clientpositive/spark/union10.q.out @@ -26,14 +26,16 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1), Reducer 5 (GROUP, 1), Reducer 7 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -52,7 +54,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: s2 @@ -68,7 +70,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 5 + Map 6 Map Operator Tree: TableScan alias: s3 @@ -107,7 +109,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -130,7 +159,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -153,6 +195,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -167,6 +222,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/spark/union12.q.out b/ql/src/test/results/clientpositive/spark/union12.q.out index e9cd26cda3..2c354e788d 100644 --- a/ql/src/test/results/clientpositive/spark/union12.q.out +++ b/ql/src/test/results/clientpositive/spark/union12.q.out @@ -26,14 +26,16 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1), Reducer 5 (GROUP, 1), Reducer 7 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -52,7 +54,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: s2 @@ -68,7 +70,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 5 + Map 6 Map Operator Tree: TableScan alias: s3 @@ -107,7 +109,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -130,7 +159,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -153,6 +195,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -167,6 +222,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/spark/union17.q.out b/ql/src/test/results/clientpositive/spark/union17.q.out index bcb95e49bd..6152cde00d 100644 --- a/ql/src/test/results/clientpositive/spark/union17.q.out +++ b/ql/src/test/results/clientpositive/spark/union17.q.out @@ -32,6 +32,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -39,10 +41,12 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2), Reducer 8 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2), Reducer 9 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 8 <- Map 1 (GROUP, 1) - Reducer 9 <- Map 1 (GROUP, 1) + Reducer 10 <- Map 1 (GROUP, 1) + Reducer 11 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 8 (GROUP PARTITION-LEVEL SORT, 2), Reducer 10 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 9 (GROUP PARTITION-LEVEL SORT, 2), Reducer 11 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -61,7 +65,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 6 + Map 8 Map Operator Tree: TableScan alias: s2 @@ -81,7 +85,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL - Map 7 + Map 9 Map Operator Tree: TableScan alias: s2 @@ -101,6 +105,50 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL + Reducer 10 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT substr(_col1, 5)) + keys: _col0 (type: string), substr(_col1, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL + Reducer 11 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT substr(_col1, 5)) + keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL Reducer 3 Reduce Operator Tree: Group By Operator @@ -121,9 +169,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 4 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -141,50 +216,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Reducer 8 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT substr(_col1, 5)) - keys: _col0 (type: string), substr(_col1, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL - Reducer 9 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 6 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT substr(_col1, 5)) - keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -199,6 +257,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/union18.q.out b/ql/src/test/results/clientpositive/spark/union18.q.out index dad205098e..6f53198f6f 100644 --- a/ql/src/test/results/clientpositive/spark/union18.q.out +++ b/ql/src/test/results/clientpositive/spark/union18.q.out @@ -32,6 +32,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -39,7 +41,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Reducer 8 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 1), Reducer 9 (GROUP, 1) + Reducer 8 <- Map 1 (GROUP, 1) + Reducer 9 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -58,7 +63,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 + Map 6 Map Operator Tree: TableScan alias: s2 @@ -76,6 +81,19 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL @@ -87,7 +105,61 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Reducer 2 + Map 7 + Map Operator Tree: + TableScan + alias: s2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -107,6 +179,19 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL @@ -118,6 +203,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Stage: Stage-0 Move Operator @@ -132,6 +245,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/union19.q.out b/ql/src/test/results/clientpositive/spark/union19.q.out index 31795b2fbf..5822ec75c7 100644 --- a/ql/src/test/results/clientpositive/spark/union19.q.out +++ b/ql/src/test/results/clientpositive/spark/union19.q.out @@ -32,6 +32,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -39,8 +41,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 4 (GROUP, 2), Reducer 2 (GROUP, 2) + Reducer 10 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 7 (GROUP, 2), Reducer 9 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 8 (GROUP, 1), Reducer 10 (GROUP, 1) + Reducer 9 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -59,7 +64,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 4 + Map 7 Map Operator Tree: TableScan alias: s2 @@ -92,7 +97,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Reducer 2 + Map 8 + Map Operator Tree: + TableScan + alias: s2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 10 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -103,30 +134,23 @@ STAGE PLANS: expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: bigint) Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator @@ -147,6 +171,82 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: bigint) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 Stage: Stage-0 Move Operator @@ -161,6 +261,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/union22.q.out b/ql/src/test/results/clientpositive/spark/union22.q.out index 0aaf4754fe..f28b63dea8 100644 --- a/ql/src/test/results/clientpositive/spark/union22.q.out +++ b/ql/src/test/results/clientpositive/spark/union22.q.out @@ -75,13 +75,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: dst_union22_delta @@ -118,7 +119,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -162,6 +163,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2), Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -208,6 +211,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), '2' (type: string) + outputColumnNames: k1, k2, k3, k4, ds + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16), compute_stats(k3, 16), compute_stats(k4, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -219,7 +241,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -260,7 +282,7 @@ STAGE PLANS: name: default.dst_union22_delta Truncated Path -> Alias: /dst_union22_delta/ds=1 [dst_union22_delta] - Map 2 + Map 3 Map Operator Tree: TableScan alias: a @@ -287,7 +309,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col4, _col5 input vertices: - 1 Map 3 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 182 Data size: 4062 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -324,6 +346,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), '2' (type: string) + outputColumnNames: k1, k2, k3, k4, ds + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16), compute_stats(k3, 16), compute_stats(k4, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -337,7 +378,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k1":"true","k2":"true","k3":"true","k4":"true"}} bucket_count -1 column.name.delimiter , columns k1,k2,k3,k4 @@ -378,6 +419,41 @@ STAGE PLANS: name: default.dst_union22 Truncated Path -> Alias: /dst_union22/ds=1 [a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -410,6 +486,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: k1, k2, k3, k4 + Column Types: string, string, string, string + Table: default.dst_union22 + Is Table Level Stats: false + PREHOOK: query: insert overwrite table dst_union22 partition (ds='2') select * from ( diff --git a/ql/src/test/results/clientpositive/spark/union28.q.out b/ql/src/test/results/clientpositive/spark/union28.q.out index 7ee06fef70..478545f59e 100644 --- a/ql/src/test/results/clientpositive/spark/union28.q.out +++ b/ql/src/test/results/clientpositive/spark/union28.q.out @@ -36,13 +36,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 2) - Reducer 5 <- Map 2 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 1), Reducer 4 (GROUP, 1), Reducer 4 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +67,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Map 2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 3 Map Operator Tree: TableScan alias: src @@ -85,26 +99,21 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union - Reducer 5 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) @@ -123,6 +132,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -137,6 +159,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union + PREHOOK: query: insert overwrite table union_subq_union select * from ( select key, value from src diff --git a/ql/src/test/results/clientpositive/spark/union29.q.out b/ql/src/test/results/clientpositive/spark/union29.q.out index 05c44d1768..11a48ab51b 100644 --- a/ql/src/test/results/clientpositive/spark/union29.q.out +++ b/ql/src/test/results/clientpositive/spark/union29.q.out @@ -36,10 +36,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -63,48 +66,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Map 2 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union - Map 3 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -119,6 +107,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union + PREHOOK: query: insert overwrite table union_subq_union select * from ( select key, value from src @@ -163,17 +158,17 @@ POSTHOOK: Input: default@union_subq_union 0 val_0 0 val_0 0 val_0 -0 val_0 -0 val_0 -0 val_0 -2 val_2 2 val_2 2 val_2 4 val_4 4 val_4 -4 val_4 5 val_5 5 val_5 5 val_5 5 val_5 5 val_5 +5 val_5 +8 val_8 +8 val_8 +9 val_9 +9 val_9 diff --git a/ql/src/test/results/clientpositive/spark/union30.q.out b/ql/src/test/results/clientpositive/spark/union30.q.out index 12eda1d3b6..3cec27f9b9 100644 --- a/ql/src/test/results/clientpositive/spark/union30.q.out +++ b/ql/src/test/results/clientpositive/spark/union30.q.out @@ -50,13 +50,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 2) - Reducer 5 <- Map 2 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 1), Map 1 (GROUP, 1), Reducer 4 (GROUP, 1), Reducer 4 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -80,7 +81,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Map 2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 3 Map Operator Tree: TableScan alias: src @@ -99,47 +113,21 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 6 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union - Reducer 5 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) @@ -158,6 +146,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -172,6 +173,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union + PREHOOK: query: insert overwrite table union_subq_union select * from ( @@ -229,14 +237,9 @@ POSTHOOK: Input: default@union_subq_union 0 val_0 0 val_0 0 val_0 -0 val_0 -0 val_0 -0 val_0 2 val_2 2 val_2 2 val_2 -2 val_2 -4 val_4 4 val_4 4 val_4 4 val_4 @@ -244,3 +247,8 @@ POSTHOOK: Input: default@union_subq_union 5 val_5 5 val_5 5 val_5 +5 val_5 +8 val_8 +8 val_8 +8 val_8 +9 val_9 diff --git a/ql/src/test/results/clientpositive/spark/union31.q.out b/ql/src/test/results/clientpositive/spark/union31.q.out index 1dc8db2505..d5074e362a 100644 --- a/ql/src/test/results/clientpositive/spark/union31.q.out +++ b/ql/src/test/results/clientpositive/spark/union31.q.out @@ -72,6 +72,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -79,26 +81,28 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 5 (GROUP, 2), Map 7 (GROUP, 2) - Reducer 3 <- Map 6 (GROUP, 2), Map 8 (GROUP, 2) + Reducer 2 <- Map 7 (GROUP, 2), Map 9 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 10 (GROUP, 2), Map 8 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 10 Map Operator Tree: TableScan - alias: t1 + alias: t2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + expressions: _col1 (type: string) + outputColumnNames: _col1 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string) + keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE @@ -108,7 +112,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 6 + Map 7 Map Operator Tree: TableScan alias: t1 @@ -118,12 +122,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col1 (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE @@ -133,22 +137,22 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 7 + Map 8 Map Operator Tree: TableScan - alias: t2 + alias: t1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + expressions: _col1 (type: string) + outputColumnNames: _col1 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string) + keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE @@ -158,7 +162,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 8 + Map 9 Map Operator Tree: TableScan alias: t2 @@ -168,12 +172,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col1 (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE @@ -203,9 +207,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -223,6 +254,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: value, cnt + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(value, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -237,6 +295,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.t3 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: value, cnt + Column Types: string, int + Table: default.t4 + Stage: Stage-1 Move Operator tables: @@ -354,6 +426,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -362,8 +436,11 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2), Reducer 5 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 5 <- Map 4 (GROUP, 2) + Reducer 4 <- Reducer 8 (GROUP, 1) + Reducer 5 <- Reducer 9 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 2) + Reducer 8 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2), Reducer 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 9 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2), Reducer 7 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -387,7 +464,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 4 + Map 6 Map Operator Tree: TableScan alias: t2 @@ -422,7 +499,49 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 3 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 8 Reduce Operator Tree: Forward Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE @@ -444,6 +563,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -462,20 +594,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t6 - Reducer 5 + Reducer 9 Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Forward + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -490,6 +635,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t5 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t6 + Stage: Stage-1 Move Operator tables: @@ -649,6 +808,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -657,7 +818,10 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2), Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 7 (GROUP, 1) + Reducer 5 <- Reducer 8 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2), Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2), Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -681,7 +845,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 4 + Map 6 Map Operator Tree: TableScan alias: t2 @@ -720,7 +884,35 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Reduce Operator Tree: Forward Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE @@ -742,6 +934,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t7 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: count(1) keys: KEY._col0 (type: string) @@ -760,6 +965,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t8 + Reducer 8 + Reduce Operator Tree: + Forward + Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -774,6 +1006,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t7 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t8 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/union33.q.out b/ql/src/test/results/clientpositive/spark/union33.q.out index def5f69305..2058493ef1 100644 --- a/ql/src/test/results/clientpositive/spark/union33.q.out +++ b/ql/src/test/results/clientpositive/spark/union33.q.out @@ -28,13 +28,15 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 1), Reducer 5 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -57,7 +59,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src - Map 2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 3 Map Operator Tree: TableScan alias: src @@ -78,7 +93,21 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 3 + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -92,7 +121,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 4 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -112,6 +141,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -126,6 +168,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_src + PREHOOK: query: INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( SELECT key, value FROM src @@ -181,6 +230,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -188,6 +238,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Map 5 (GROUP, 1), Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -211,7 +262,7 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 4 + Map 5 Map Operator Tree: TableScan alias: src @@ -231,6 +282,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -265,6 +329,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -279,6 +370,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_src + PREHOOK: query: INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( SELECT key, cast(COUNT(*) as string) AS value FROM src diff --git a/ql/src/test/results/clientpositive/spark/union4.q.out b/ql/src/test/results/clientpositive/spark/union4.q.out index 45705e9997..75673150ab 100644 --- a/ql/src/test/results/clientpositive/spark/union4.q.out +++ b/ql/src/test/results/clientpositive/spark/union4.q.out @@ -22,13 +22,15 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 3 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1), Reducer 5 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -47,7 +49,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: s2 @@ -86,7 +88,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -109,6 +138,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -123,6 +165,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/spark/union6.q.out b/ql/src/test/results/clientpositive/spark/union6.q.out index d419c9a6ac..b1893786e1 100644 --- a/ql/src/test/results/clientpositive/spark/union6.q.out +++ b/ql/src/test/results/clientpositive/spark/union6.q.out @@ -22,12 +22,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 4 (GROUP, 1), Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -46,7 +48,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: s2 @@ -63,6 +65,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -82,6 +97,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -96,6 +138,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable + PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/spark/union_lateralview.q.out b/ql/src/test/results/clientpositive/spark/union_lateralview.q.out index fe9afb8f88..ce4025cb80 100644 --- a/ql/src/test/results/clientpositive/spark/union_lateralview.q.out +++ b/ql/src/test/results/clientpositive/spark/union_lateralview.q.out @@ -48,12 +48,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -104,7 +106,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: srcpart @@ -152,7 +154,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string) - Map 4 + Map 5 Map Operator Tree: TableScan alias: b @@ -184,6 +186,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_union_lateral_view + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, arr_ele, value + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(arr_ele, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -198,6 +227,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, arr_ele, value + Column Types: int, int, string + Table: default.test_union_lateral_view + PREHOOK: query: INSERT OVERWRITE TABLE test_union_lateral_view SELECT b.key, d.arr_ele, d.value FROM ( diff --git a/ql/src/test/results/clientpositive/spark/union_top_level.q.out b/ql/src/test/results/clientpositive/spark/union_top_level.q.out index 6adf6c43e5..47da227752 100644 --- a/ql/src/test/results/clientpositive/spark/union_top_level.q.out +++ b/ql/src/test/results/clientpositive/spark/union_top_level.q.out @@ -564,14 +564,16 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1), Reducer 5 (GROUP, 1), Reducer 7 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -592,9 +594,8 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src @@ -612,9 +613,8 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) - Map 5 + Map 6 Map Operator Tree: TableScan alias: src @@ -632,7 +632,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Reducer 2 Reduce Operator Tree: @@ -655,7 +654,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) @@ -676,7 +702,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) @@ -697,6 +736,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -711,6 +763,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.union_top + PREHOOK: query: insert into table union_top select * from (select key, 0 as value from src where key % 3 == 0 limit 3)a union all @@ -768,14 +827,16 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1), Reducer 5 (GROUP, 1), Reducer 7 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -796,9 +857,8 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src @@ -816,9 +876,8 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) - Map 5 + Map 6 Map Operator Tree: TableScan alias: src @@ -836,7 +895,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Reducer 2 Reduce Operator Tree: @@ -859,7 +917,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) @@ -880,7 +965,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) @@ -901,6 +999,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -915,6 +1026,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.union_top + PREHOOK: query: insert overwrite table union_top select * from (select key, 0 as value from src where key % 3 == 0 limit 3)a union all diff --git a/ql/src/test/results/clientpositive/spark/vector_char_4.q.out b/ql/src/test/results/clientpositive/spark/vector_char_4.q.out index 943a4b1423..a62370a796 100644 --- a/ql/src/test/results/clientpositive/spark/vector_char_4.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_char_4.q.out @@ -135,10 +135,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -146,38 +149,56 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] - selectExpressions: CastLongToChar(col 0, maxLength 10) -> 13:Char, CastLongToChar(col 1, maxLength 10) -> 14:Char, CastLongToChar(col 2, maxLength 20) -> 15:Char, CastLongToChar(col 3, maxLength 30) -> 16:Char, VectorUDFAdaptor(CAST( f AS CHAR(20)) -> 17:char(20), VectorUDFAdaptor(CAST( d AS CHAR(20)) -> 18:char(20), CastStringGroupToChar(col 8, maxLength 50) -> 19:Char Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.char_lazy_binary_columnar - Execution mode: vectorized + Select Operator + expressions: _col0 (type: char(10)), _col1 (type: char(10)), _col2 (type: char(20)), _col3 (type: char(30)), _col4 (type: char(20)), _col5 (type: char(20)), _col6 (type: char(50)) + outputColumnNames: ct, csi, ci, cb, cf, cd, cs + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ct, 16), compute_stats(csi, 16), compute_stats(ci, 16), compute_stats(cb, 16), compute_stats(cf, 16), compute_stats(cd, 16), compute_stats(cs, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -192,3 +213,10 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: ct, csi, ci, cb, cf, cd, cs + Column Types: char(10), char(10), char(20), char(30), char(20), char(20), char(50) + Table: default.char_lazy_binary_columnar + diff --git a/ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out b/ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out index 1c8e479512..86978b934f 100644 --- a/ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out @@ -135,10 +135,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -146,38 +149,56 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] - selectExpressions: CastLongToVarChar(col 0, maxLength 10) -> 13:VarChar, CastLongToVarChar(col 1, maxLength 10) -> 14:VarChar, CastLongToVarChar(col 2, maxLength 20) -> 15:VarChar, CastLongToVarChar(col 3, maxLength 30) -> 16:VarChar, VectorUDFAdaptor(CAST( f AS varchar(20))) -> 17:varchar(20), VectorUDFAdaptor(CAST( d AS varchar(20))) -> 18:varchar(20), CastStringGroupToVarChar(col 8, maxLength 50) -> 19:VarChar Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.varchar_lazy_binary_columnar - Execution mode: vectorized + Select Operator + expressions: _col0 (type: varchar(10)), _col1 (type: varchar(10)), _col2 (type: varchar(20)), _col3 (type: varchar(30)), _col4 (type: varchar(20)), _col5 (type: varchar(20)), _col6 (type: varchar(50)) + outputColumnNames: vt, vsi, vi, vb, vf, vd, vs + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(vt, 16), compute_stats(vsi, 16), compute_stats(vi, 16), compute_stats(vb, 16), compute_stats(vf, 16), compute_stats(vd, 16), compute_stats(vs, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -192,3 +213,10 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: vt, vsi, vi, vb, vf, vd, vs + Column Types: varchar(10), varchar(10), varchar(20), varchar(30), varchar(20), varchar(20), varchar(50) + Table: default.varchar_lazy_binary_columnar + diff --git a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index 2395091f38..39a9d6f493 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -3702,6 +3702,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -3709,11 +3711,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 8 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 1) + Reducer 8 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 9 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -3985,6 +3989,22 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, r, dr, s + Column Types: string, string, int, int, int, double + Table: default.part_4 + Is Table Level Stats: true + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 + Column Types: string, string, int, int, int, int, double, int + Table: default.part_5 + Is Table Level Stats: true + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/stats0.q.out b/ql/src/test/results/clientpositive/stats0.q.out index 0476acbed1..177a3d5db8 100644 --- a/ql/src/test/results/clientpositive/stats0.q.out +++ b/ql/src/test/results/clientpositive/stats0.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -63,6 +64,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -114,6 +131,35 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -147,6 +193,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_non_partitioned + Is Table Level Stats: true + PREHOOK: query: insert overwrite table stats_non_partitioned select * from src PREHOOK: type: QUERY @@ -697,6 +751,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -717,6 +772,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.stats_partitioned + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -733,6 +822,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_partitioned + PREHOOK: query: insert overwrite table stats_partitioned partition (ds='1') select * from src PREHOOK: type: QUERY @@ -1332,6 +1428,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -1380,6 +1477,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1431,6 +1544,35 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1473,6 +1615,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_non_partitioned + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -2205,6 +2355,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -2228,6 +2379,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.stats_partitioned + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -2253,6 +2438,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_partitioned + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/stats1.q.out b/ql/src/test/results/clientpositive/stats1.q.out index 2d5b4f8345..e56f8d31bd 100644 --- a/ql/src/test/results/clientpositive/stats1.q.out +++ b/ql/src/test/results/clientpositive/stats1.q.out @@ -25,6 +25,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -75,6 +76,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: s2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -92,6 +106,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -106,6 +146,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable + PREHOOK: query: INSERT OVERWRITE TABLE tmptable SELECT unionsrc.key, unionsrc.value FROM (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s1 @@ -178,7 +225,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 26 rawDataSize 199 diff --git a/ql/src/test/results/clientpositive/stats10.q.out b/ql/src/test/results/clientpositive/stats10.q.out index d1fe47393b..9e95d044ac 100644 --- a/ql/src/test/results/clientpositive/stats10.q.out +++ b/ql/src/test/results/clientpositive/stats10.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -48,6 +50,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket3_1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -64,6 +82,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket3_1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1') select * from src PREHOOK: type: QUERY @@ -414,7 +468,7 @@ Database: default Table: bucket3_1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -453,7 +507,7 @@ Database: default Table: bucket3_1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/stats14.q.out b/ql/src/test/results/clientpositive/stats14.q.out index 85017462c3..1cd660cd67 100644 --- a/ql/src/test/results/clientpositive/stats14.q.out +++ b/ql/src/test/results/clientpositive/stats14.q.out @@ -42,7 +42,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -186,7 +186,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -226,7 +226,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/stats15.q.out b/ql/src/test/results/clientpositive/stats15.q.out index 85017462c3..1cd660cd67 100644 --- a/ql/src/test/results/clientpositive/stats15.q.out +++ b/ql/src/test/results/clientpositive/stats15.q.out @@ -42,7 +42,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -186,7 +186,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -226,7 +226,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/stats18.q.out b/ql/src/test/results/clientpositive/stats18.q.out index 4945808098..de8918a40a 100644 --- a/ql/src/test/results/clientpositive/stats18.q.out +++ b/ql/src/test/results/clientpositive/stats18.q.out @@ -39,7 +39,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/stats4.q.out b/ql/src/test/results/clientpositive/stats4.q.out index 8f503a90b8..b4f04e1482 100644 --- a/ql/src/test/results/clientpositive/stats4.q.out +++ b/ql/src/test/results/clientpositive/stats4.q.out @@ -48,16 +48,14 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-3, Stage-9, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 + Stage-1 depends on stages: Stage-2 Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -81,6 +79,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE @@ -96,6 +110,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -122,6 +170,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part1 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part2 + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -152,15 +214,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -181,31 +234,30 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### PREHOOK: query: from srcpart insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' @@ -2305,7 +2357,7 @@ Database: default Table: nzhang_part1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -2345,7 +2397,7 @@ Database: default Table: nzhang_part1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -2385,7 +2437,7 @@ Database: default Table: nzhang_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -2425,7 +2477,7 @@ Database: default Table: nzhang_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out b/ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out index b855b3896e..3cafd543b5 100644 --- a/ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out +++ b/ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -42,6 +43,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, part + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -67,6 +102,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.tmptable + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/stats_list_bucket.q.out b/ql/src/test/results/clientpositive/stats_list_bucket.q.out index 0c43b1bb89..e7b7b26235 100644 --- a/ql/src/test/results/clientpositive/stats_list_bucket.q.out +++ b/ql/src/test/results/clientpositive/stats_list_bucket.q.out @@ -61,7 +61,7 @@ Database: default Table: stats_list_bucket #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 @@ -133,7 +133,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 diff --git a/ql/src/test/results/clientpositive/stats_only_null.q.out b/ql/src/test/results/clientpositive/stats_only_null.q.out index 88c2114356..c537da5a8b 100644 --- a/ql/src/test/results/clientpositive/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/stats_only_null.q.out @@ -73,46 +73,12 @@ POSTHOOK: query: explain select count(*), count(a), count(b), count(c), count(d) from stats_null POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: stats_null - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -123,46 +89,12 @@ POSTHOOK: query: explain select count(*), count(a), count(b), count(c), count(d) from stats_null_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: stats_null_part - Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d - Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink diff --git a/ql/src/test/results/clientpositive/stats_partial_size.q.out b/ql/src/test/results/clientpositive/stats_partial_size.q.out index c779741314..f9fe2f90f6 100644 --- a/ql/src/test/results/clientpositive/stats_partial_size.q.out +++ b/ql/src/test/results/clientpositive/stats_partial_size.q.out @@ -47,16 +47,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: sample_partitioned - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: x (type: int), y (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) TableScan alias: sample @@ -81,10 +81,10 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/statsfs.q.out b/ql/src/test/results/clientpositive/statsfs.q.out index d070e9aa6f..16da73418f 100644 --- a/ql/src/test/results/clientpositive/statsfs.q.out +++ b/ql/src/test/results/clientpositive/statsfs.q.out @@ -176,7 +176,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -215,7 +215,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -343,7 +343,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -426,7 +426,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -466,7 +466,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/subquery_multiinsert.q.out b/ql/src/test/results/clientpositive/subquery_multiinsert.q.out index 28c82b85d2..f7bad8b093 100644 --- a/ql/src/test/results/clientpositive/subquery_multiinsert.q.out +++ b/ql/src/test/results/clientpositive/subquery_multiinsert.q.out @@ -58,18 +58,21 @@ INSERT OVERWRITE TABLE src_5 order by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-2 depends on stages: Stage-10 + Stage-11 is a root stage + Stage-2 depends on stages: Stage-11 Stage-3 depends on stages: Stage-2 Stage-4 depends on stages: Stage-3 Stage-1 depends on stages: Stage-4 Stage-5 depends on stages: Stage-1 + Stage-12 depends on stages: Stage-5, Stage-7, Stage-8 + Stage-13 depends on stages: Stage-5, Stage-7, Stage-8 Stage-6 depends on stages: Stage-2 Stage-0 depends on stages: Stage-6 Stage-7 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan @@ -221,6 +224,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -235,6 +258,20 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_4 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_5 + Stage: Stage-6 Map Reduce Map Operator Tree: @@ -281,6 +318,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -295,6 +347,28 @@ STAGE PLANS: Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: from src b INSERT OVERWRITE TABLE src_4 @@ -332,15 +406,18 @@ POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, c POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-10:MAPRED +RUN: Stage-11:MAPRED RUN: Stage-2:MAPRED RUN: Stage-3:MAPRED RUN: Stage-6:MAPRED RUN: Stage-4:MAPRED RUN: Stage-0:MOVE +RUN: Stage-8:MAPRED RUN: Stage-1:MOVE RUN: Stage-7:STATS RUN: Stage-5:STATS +RUN: Stage-12:COLUMNSTATS +RUN: Stage-13:COLUMNSTATS PREHOOK: query: select * from src_4 PREHOOK: type: QUERY PREHOOK: Input: default@src_4 @@ -487,7 +564,7 @@ POSTHOOK: Input: default@src_5 199 val_199 199 val_199 2 val_2 -Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-13:MAPRED' is a cross product +Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-14:MAPRED' is a cross product Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain from src b @@ -518,22 +595,25 @@ INSERT OVERWRITE TABLE src_5 order by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-14 depends on stages: Stage-10 , consists of Stage-17, Stage-2 - Stage-17 has a backup stage: Stage-2 - Stage-13 depends on stages: Stage-17 - Stage-15 depends on stages: Stage-2, Stage-13 - Stage-4 depends on stages: Stage-15 + Stage-11 is a root stage + Stage-15 depends on stages: Stage-11 , consists of Stage-18, Stage-2 + Stage-18 has a backup stage: Stage-2 + Stage-14 depends on stages: Stage-18 + Stage-16 depends on stages: Stage-2, Stage-14 + Stage-4 depends on stages: Stage-16 Stage-1 depends on stages: Stage-4 Stage-5 depends on stages: Stage-1 - Stage-16 depends on stages: Stage-2, Stage-13 - Stage-12 depends on stages: Stage-16 - Stage-0 depends on stages: Stage-12 + Stage-19 depends on stages: Stage-5, Stage-7, Stage-8 + Stage-20 depends on stages: Stage-5, Stage-7, Stage-8 + Stage-17 depends on stages: Stage-2, Stage-14 + Stage-13 depends on stages: Stage-17 + Stage-0 depends on stages: Stage-13 Stage-7 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-13 Stage-2 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan @@ -576,10 +656,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-14 + Stage: Stage-15 Conditional Operator - Stage: Stage-17 + Stage: Stage-18 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -593,7 +673,7 @@ STAGE PLANS: 0 1 - Stage: Stage-13 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan @@ -625,7 +705,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-15 + Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: sq_2:s1 @@ -687,6 +767,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -701,7 +801,21 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator - Stage: Stage-16 + Stage: Stage-19 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_4 + + Stage: Stage-20 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_5 + + Stage: Stage-17 Map Reduce Local Work Alias -> Map Local Tables: sq_1:a @@ -729,7 +843,7 @@ STAGE PLANS: 0 key (type: string), value (type: string) 1 _col0 (type: string), _col1 (type: string) - Stage: Stage-12 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan @@ -749,6 +863,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -765,6 +894,28 @@ STAGE PLANS: Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 Map Reduce Map Operator Tree: @@ -804,7 +955,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-13:MAPRED' is a cross product +Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-14:MAPRED' is a cross product Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: from src b INSERT OVERWRITE TABLE src_4 @@ -842,18 +993,21 @@ POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, c POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-10:MAPRED -RUN: Stage-14:CONDITIONAL -RUN: Stage-17:MAPREDLOCAL -RUN: Stage-13:MAPRED -RUN: Stage-15:MAPREDLOCAL +RUN: Stage-11:MAPRED +RUN: Stage-15:CONDITIONAL +RUN: Stage-18:MAPREDLOCAL +RUN: Stage-14:MAPRED RUN: Stage-16:MAPREDLOCAL +RUN: Stage-17:MAPREDLOCAL RUN: Stage-4:MAPRED -RUN: Stage-12:MAPRED +RUN: Stage-13:MAPRED RUN: Stage-1:MOVE RUN: Stage-0:MOVE +RUN: Stage-8:MAPRED RUN: Stage-5:STATS RUN: Stage-7:STATS +RUN: Stage-19:COLUMNSTATS +RUN: Stage-20:COLUMNSTATS PREHOOK: query: select * from src_4 PREHOOK: type: QUERY PREHOOK: Input: default@src_4 diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out index 6602222ed7..0675ed5f53 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out @@ -120,21 +120,34 @@ POSTHOOK: query: explain analyze insert overwrite table t select key from src POSTHOOK: type: QUERY Plan optimized by CBO. -Stage-3 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.t"} - Stage-2 - Dependency Collection{} - Stage-1 - Map 1 - File Output Operator [FS_2] - table:{"name:":"default.t"} - Select Operator [SEL_1] (rows=500/500 width=87) - Output:["_col0"] - TableScan [TS_0] (rows=500/500 width=87) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] +Vertex dependency in root stage +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + +Stage-4 + Column Stats Work{} + Stage-3 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.t"} + Stage-2 + Dependency Collection{} + Stage-1 + Reducer 2 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=480) + Output:["_col0"],aggregations:["compute_stats(VALUE._col0, 16)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_2] + table:{"name:":"default.t"} + Select Operator [SEL_1] (rows=500/500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500/500 width=87) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/500 width=87) + Output:["key"] + Please refer to the previous Select Operator [SEL_1] PREHOOK: query: select key from src limit 10 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out index f6844c4a38..61bc10fb33 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out @@ -645,20 +645,20 @@ Stage-0 Stage-1 Map 1 File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_15] (rows=266/480 width=18) + Merge Join Operator [MERGEJOIN_15] (rows=500/480 width=95) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_5] (rows=242/242 width=18) + <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_14] (rows=242/242 width=18) + Filter Operator [FIL_14] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_3] (rows=242/242 width=18) - default@tab,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=242/242 width=18) + TableScan [TS_3] (rows=242/242 width=4) + default@tab,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_13] (rows=242/242 width=18) + Filter Operator [FIL_13] (rows=242/242 width=95) predicate:key is not null - TableScan [TS_0] (rows=242/242 width=18) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=242/242 width=95) + default@tab,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key join tab s2 on s1.value=s2.value PREHOOK: type: QUERY @@ -687,34 +687,34 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_16] - Merge Join Operator [MERGEJOIN_27] (rows=292/1166 width=18) + Merge Join Operator [MERGEJOIN_27] (rows=1080/1166 width=95) Conds:RS_12._col1=RS_13._col1(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_25] (rows=266/480 width=18) + Merge Join Operator [MERGEJOIN_25] (rows=500/480 width=95) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_5] (rows=242/242 width=18) + <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_23] (rows=242/242 width=18) + Filter Operator [FIL_23] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_3] (rows=242/242 width=18) - default@tab,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=242/242 width=18) + TableScan [TS_3] (rows=242/242 width=4) + default@tab,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=242/242 width=18) + Filter Operator [FIL_22] (rows=242/242 width=95) predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242/242 width=18) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=242/242 width=95) + default@tab,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 4 [SIMPLE_EDGE] SHUFFLE [RS_13] PartitionCols:_col1 - Select Operator [SEL_8] (rows=242/242 width=18) + Select Operator [SEL_8] (rows=242/242 width=91) Output:["_col1"] - Filter Operator [FIL_24] (rows=242/242 width=18) + Filter Operator [FIL_24] (rows=242/242 width=91) predicate:value is not null - TableScan [TS_6] (rows=242/242 width=18) - default@tab,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] + TableScan [TS_6] (rows=242/242 width=91) + default@tab,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] PREHOOK: query: select s1.key as key, s1.value as value from tab s1 join tab2 s3 on s1.key=s3.key PREHOOK: type: QUERY @@ -744,20 +744,20 @@ Stage-0 Stage-1 Map 1 File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_15] (rows=266/480 width=18) + Merge Join Operator [MERGEJOIN_15] (rows=500/480 width=95) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_5] (rows=242/242 width=18) + <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_14] (rows=242/242 width=18) + Filter Operator [FIL_14] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_3] (rows=242/242 width=18) - default@tab2,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=242/242 width=18) + TableScan [TS_3] (rows=242/242 width=4) + default@tab2,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_13] (rows=242/242 width=18) + Filter Operator [FIL_13] (rows=242/242 width=95) predicate:key is not null - TableScan [TS_0] (rows=242/242 width=18) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=242/242 width=95) + default@tab,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: select s1.key as key, s1.value as value from tab s1 join tab2 s3 on s1.key=s3.key join tab2 s2 on s1.value=s2.value PREHOOK: type: QUERY @@ -790,34 +790,34 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_16] - Merge Join Operator [MERGEJOIN_27] (rows=292/1166 width=18) + Merge Join Operator [MERGEJOIN_27] (rows=1080/1166 width=95) Conds:RS_12._col1=RS_13._col1(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_25] (rows=266/480 width=18) + Merge Join Operator [MERGEJOIN_25] (rows=500/480 width=95) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_5] (rows=242/242 width=18) + <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_23] (rows=242/242 width=18) + Filter Operator [FIL_23] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_3] (rows=242/242 width=18) - default@tab2,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=242/242 width=18) + TableScan [TS_3] (rows=242/242 width=4) + default@tab2,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=242/242 width=18) + Filter Operator [FIL_22] (rows=242/242 width=95) predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242/242 width=18) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=242/242 width=95) + default@tab,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 4 [SIMPLE_EDGE] SHUFFLE [RS_13] PartitionCols:_col1 - Select Operator [SEL_8] (rows=242/242 width=18) + Select Operator [SEL_8] (rows=242/242 width=91) Output:["_col1"] - Filter Operator [FIL_24] (rows=242/242 width=18) + Filter Operator [FIL_24] (rows=242/242 width=91) predicate:value is not null - TableScan [TS_6] (rows=242/242 width=18) - default@tab2,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] + TableScan [TS_6] (rows=242/242 width=91) + default@tab2,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] PREHOOK: query: select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key UNION ALL @@ -869,43 +869,43 @@ Stage-0 Output:["_col0"],aggregations:["count()"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_22] - Merge Join Operator [MERGEJOIN_38] (rows=558/1646 width=18) + Merge Join Operator [MERGEJOIN_38] (rows=1892/1646 width=8) Conds:Union 2._col0=RS_19._col0(Inner) <-Map 7 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_17] (rows=500/500 width=18) + Select Operator [SEL_17] (rows=500/500 width=4) Output:["_col0"] - Filter Operator [FIL_35] (rows=500/500 width=18) + Filter Operator [FIL_35] (rows=500/500 width=4) predicate:key is not null - TableScan [TS_15] (rows=500/500 width=18) - default@tab_part,b,Tbl:COMPLETE,Col:NONE,Output:["key"] + TableScan [TS_15] (rows=500/500 width=4) + default@tab_part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] <-Union 2 [SIMPLE_EDGE] <-Map 1 [CONTAINS] Reduce Output Operator [RS_18] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_36] (rows=266/480 width=18) + Merge Join Operator [MERGEJOIN_36] (rows=500/480 width=4) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0"] - <-Select Operator [SEL_5] (rows=242/242 width=18) + <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_33] (rows=242/242 width=18) + Filter Operator [FIL_33] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_3] (rows=242/242 width=18) - default@tab,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=242/242 width=18) + TableScan [TS_3] (rows=242/242 width=4) + default@tab,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_32] (rows=242/242 width=18) + Filter Operator [FIL_32] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_0] (rows=242/242 width=18) + TableScan [TS_0] (rows=242/242 width=4) Output:["key"] <-Map 6 [CONTAINS] Reduce Output Operator [RS_18] PartitionCols:_col0 - Select Operator [SEL_12] (rows=242/242 width=18) + Select Operator [SEL_12] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_34] (rows=242/242 width=18) + Filter Operator [FIL_34] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_10] (rows=242/242 width=18) + TableScan [TS_10] (rows=242/242 width=4) Output:["key"] PREHOOK: query: select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key join tab s2 on s1.value=s2.value @@ -958,58 +958,58 @@ Stage-0 Output:["_col0"],aggregations:["count()"] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_28] - Merge Join Operator [MERGEJOIN_50] (rows=587/3768 width=18) + Merge Join Operator [MERGEJOIN_50] (rows=3372/3768 width=8) Conds:Union 3._col0=RS_25._col0(Inner) <-Map 9 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col0 - Select Operator [SEL_23] (rows=500/500 width=18) + Select Operator [SEL_23] (rows=500/500 width=4) Output:["_col0"] - Filter Operator [FIL_46] (rows=500/500 width=18) + Filter Operator [FIL_46] (rows=500/500 width=4) predicate:key is not null - TableScan [TS_21] (rows=500/500 width=18) - default@tab_part,b,Tbl:COMPLETE,Col:NONE,Output:["key"] + TableScan [TS_21] (rows=500/500 width=4) + default@tab_part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] <-Union 3 [SIMPLE_EDGE] <-Map 8 [CONTAINS] Reduce Output Operator [RS_24] PartitionCols:_col0 - Select Operator [SEL_18] (rows=242/242 width=18) + Select Operator [SEL_18] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_45] (rows=242/242 width=18) + Filter Operator [FIL_45] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_16] (rows=242/242 width=18) + TableScan [TS_16] (rows=242/242 width=4) Output:["key"] <-Reducer 2 [CONTAINS] Reduce Output Operator [RS_24] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_49] (rows=292/1166 width=18) + Merge Join Operator [MERGEJOIN_49] (rows=1080/1166 width=4) Conds:RS_12._col1=RS_13._col1(Inner),Output:["_col0"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_47] (rows=266/480 width=18) + Merge Join Operator [MERGEJOIN_47] (rows=500/480 width=95) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_5] (rows=242/242 width=18) + <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_43] (rows=242/242 width=18) + Filter Operator [FIL_43] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_3] (rows=242/242 width=18) - default@tab,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=242/242 width=18) + TableScan [TS_3] (rows=242/242 width=4) + default@tab,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_42] (rows=242/242 width=18) + Filter Operator [FIL_42] (rows=242/242 width=95) predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242/242 width=18) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=242/242 width=95) + default@tab,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 7 [SIMPLE_EDGE] SHUFFLE [RS_13] PartitionCols:_col1 - Select Operator [SEL_8] (rows=242/242 width=18) + Select Operator [SEL_8] (rows=242/242 width=91) Output:["_col1"] - Filter Operator [FIL_44] (rows=242/242 width=18) + Filter Operator [FIL_44] (rows=242/242 width=91) predicate:value is not null - TableScan [TS_6] (rows=242/242 width=18) - default@tab,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] + TableScan [TS_6] (rows=242/242 width=91) + default@tab,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] PREHOOK: query: CREATE TABLE a(key STRING, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE @@ -1756,64 +1756,89 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -Stage-4 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.dest1"} - Stage-3 - Dependency Collection{} - Stage-2 - Reducer 5 - File Output Operator [FS_19] - table:{"name:":"default.dest1"} - Select Operator [SEL_17] (rows=205/310 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_16] (rows=205/310 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col0 - Group By Operator [GBY_12] (rows=501/310 width=272) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 3 [SIMPLE_EDGE] - <-Map 6 [CONTAINS] - Reduce Output Operator [RS_11] - PartitionCols:_col0, _col1 - Select Operator [SEL_7] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_6] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 2 [CONTAINS] - Reduce Output Operator [RS_11] - PartitionCols:_col0, _col1 - Select Operator [SEL_5] (rows=1/1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_4] (rows=1/1 width=8) - Output:["_col0"],aggregations:["count(1)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_3] - Select Operator [SEL_1] (rows=500/500 width=10) - TableScan [TS_0] (rows=500/500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - File Output Operator [FS_25] - table:{"name:":"default.dest2"} - Select Operator [SEL_23] (rows=501/310 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_22] (rows=501/310 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 - Please refer to the previous Group By Operator [GBY_12] -Stage-5 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.dest2"} - Please refer to the previous Stage-3 +Stage-6 + Column Stats Work{} + Stage-4 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.dest1"} + Stage-3 + Dependency Collection{} + Stage-2 + Reducer 6 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_19] + table:{"name:":"default.dest1"} + Select Operator [SEL_17] (rows=205/310 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_16] (rows=205/310 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Reducer 4 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_15] + PartitionCols:_col0 + Group By Operator [GBY_12] (rows=501/310 width=272) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] + Reduce Output Operator [RS_11] + PartitionCols:_col0, _col1 + Select Operator [SEL_7] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_6] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 2 [CONTAINS] + Reduce Output Operator [RS_11] + PartitionCols:_col0, _col1 + Select Operator [SEL_5] (rows=1/1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_4] (rows=1/1 width=8) + Output:["_col0"],aggregations:["count(1)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_3] + Select Operator [SEL_1] (rows=500/310 width=10) + TableScan [TS_0] (rows=500/500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + File Output Operator [FS_25] + table:{"name:":"default.dest2"} + Select Operator [SEL_23] (rows=501/310 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_22] (rows=501/310 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 + Please refer to the previous Group By Operator [GBY_12] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=501/310 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_23] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=205/310 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_17] + Reducer 7 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=1440) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)","compute_stats(VALUE._col3, 16)"] + <- Please refer to the previous Reducer 4 [CUSTOM_SIMPLE_EDGE] + Stage-5 + Stats-Aggr Operator + Stage-1 + Move Operator + table:{"name:":"default.dest2"} + Please refer to the previous Stage-3 +Stage-7 + Column Stats Work{} + Please refer to the previous Stage-4 + Please refer to the previous Stage-5 PREHOOK: query: FROM UNIQUEJOIN PRESERVE src a (a.key), PRESERVE src1 b (b.key), PRESERVE srcpart c (c.key) SELECT a.key, b.key, c.key PREHOOK: type: QUERY @@ -1987,77 +2012,101 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) -Map 7 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) +Map 9 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) -Reducer 5 <- Union 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Union 3 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Stage-4 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.dest1"} - Stage-3 - Dependency Collection{} - Stage-2 - Reducer 4 - File Output Operator [FS_18] - table:{"name:":"default.dest1"} - Select Operator [SEL_16] (rows=205/310 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_15] (rows=205/310 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 6 [CONTAINS] - Reduce Output Operator [RS_14] - PartitionCols:_col0 - Select Operator [SEL_7] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_6] (rows=500/500 width=178) - Output:["key","value"] - Reduce Output Operator [RS_20] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_7] - <-Map 7 [CONTAINS] - Reduce Output Operator [RS_14] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_10] (rows=500/500 width=178) - Output:["key","value"] - Reduce Output Operator [RS_20] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_11] - <-Reducer 2 [CONTAINS] - Reduce Output Operator [RS_14] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=1/1 width=272) +Stage-6 + Column Stats Work{} + Stage-4 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.dest1"} + Stage-3 + Dependency Collection{} + Stage-2 + Reducer 5 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_18] + table:{"name:":"default.dest1"} + Select Operator [SEL_16] (rows=205/310 width=272) Output:["_col0","_col1"] - Group By Operator [GBY_4] (rows=1/1 width=8) - Output:["_col0"],aggregations:["count(1)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_3] - Select Operator [SEL_1] (rows=500/500 width=10) - TableScan [TS_0] (rows=500/500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_20] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_5] - Reducer 5 - File Output Operator [FS_24] - table:{"name:":"default.dest2"} - Select Operator [SEL_22] (rows=1001/310 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_21] (rows=1001/310 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] -Stage-5 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.dest2"} - Please refer to the previous Stage-3 + Group By Operator [GBY_15] (rows=205/310 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] + Reduce Output Operator [RS_14] + PartitionCols:_col0 + Select Operator [SEL_7] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_6] (rows=500/500 width=178) + Output:["key","value"] + Reduce Output Operator [RS_20] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_7] + <-Map 9 [CONTAINS] + Reduce Output Operator [RS_14] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_10] (rows=500/500 width=178) + Output:["key","value"] + Reduce Output Operator [RS_20] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_11] + <-Reducer 2 [CONTAINS] + Reduce Output Operator [RS_14] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=1/1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_4] (rows=1/1 width=8) + Output:["_col0"],aggregations:["count(1)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_3] + Select Operator [SEL_1] (rows=500/310 width=10) + TableScan [TS_0] (rows=500/500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_20] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_5] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=205/310 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_16] + Reducer 7 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=1440) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)","compute_stats(VALUE._col3, 16)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_24] + table:{"name:":"default.dest2"} + Select Operator [SEL_22] (rows=1001/310 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_21] (rows=1001/310 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=1001/310 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_22] + Stage-5 + Stats-Aggr Operator + Stage-1 + Move Operator + table:{"name:":"default.dest2"} + Please refer to the previous Stage-3 +Stage-7 + Column Stats Work{} + Please refer to the previous Stage-4 + Please refer to the previous Stage-5 PREHOOK: query: FROM ( select 'tst1' as key, cast(count(1) as string) as value, 'tst1' as value2 from src s1 @@ -2106,64 +2155,88 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) -Reducer 5 <- Union 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Union 3 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Stage-4 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.dest1"} - Stage-3 - Dependency Collection{} - Stage-2 - Reducer 4 - File Output Operator [FS_15] - table:{"name:":"default.dest1"} - Select Operator [SEL_13] (rows=205/310 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_12] (rows=205/310 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 6 [CONTAINS] - Reduce Output Operator [RS_11] - PartitionCols:_col0 - Select Operator [SEL_7] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_6] (rows=500/500 width=178) - Output:["key","value"] - Reduce Output Operator [RS_17] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_7] - <-Reducer 2 [CONTAINS] - Reduce Output Operator [RS_11] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=1/1 width=272) +Stage-6 + Column Stats Work{} + Stage-4 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.dest1"} + Stage-3 + Dependency Collection{} + Stage-2 + Reducer 5 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_15] + table:{"name:":"default.dest1"} + Select Operator [SEL_13] (rows=205/310 width=272) Output:["_col0","_col1"] - Group By Operator [GBY_4] (rows=1/1 width=8) - Output:["_col0"],aggregations:["count(1)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_3] - Select Operator [SEL_1] (rows=500/500 width=10) - TableScan [TS_0] (rows=500/500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_17] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_5] - Reducer 5 - File Output Operator [FS_21] - table:{"name:":"default.dest2"} - Select Operator [SEL_19] (rows=501/310 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_18] (rows=501/310 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] -Stage-5 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.dest2"} - Please refer to the previous Stage-3 + Group By Operator [GBY_12] (rows=205/310 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] + Reduce Output Operator [RS_11] + PartitionCols:_col0 + Select Operator [SEL_7] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_6] (rows=500/500 width=178) + Output:["key","value"] + Reduce Output Operator [RS_17] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_7] + <-Reducer 2 [CONTAINS] + Reduce Output Operator [RS_11] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=1/1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_4] (rows=1/1 width=8) + Output:["_col0"],aggregations:["count(1)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_3] + Select Operator [SEL_1] (rows=500/310 width=10) + TableScan [TS_0] (rows=500/500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_17] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_5] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=205/310 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_13] + Reducer 7 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=1440) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)","compute_stats(VALUE._col3, 16)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_21] + table:{"name:":"default.dest2"} + Select Operator [SEL_19] (rows=501/310 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_18] (rows=501/310 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=501/310 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_19] + Stage-5 + Stats-Aggr Operator + Stage-1 + Move Operator + table:{"name:":"default.dest2"} + Please refer to the previous Stage-3 +Stage-7 + Column Stats Work{} + Please refer to the previous Stage-4 + Please refer to the previous Stage-5 diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out index e5c8d6c51e..5d270a4c1c 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out @@ -652,35 +652,48 @@ POSTHOOK: query: explain analyze insert overwrite table orc_merge5 select userid POSTHOOK: type: QUERY Plan optimized by CBO. -Stage-3 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.orc_merge5"} - Stage-2 - Dependency Collection{} - Stage-5(CONDITIONAL) - Move Operator - Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) - Conditional Operator - Stage-1 - Map 1 - File Output Operator [FS_3] - table:{"name:":"default.orc_merge5"} - Select Operator [SEL_2] (rows=306/3 width=268) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_4] (rows=306/3 width=268) - predicate:(userid <= 13) - TableScan [TS_0] (rows=919/15000 width=268) - default@orc_merge5,orc_merge5,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] - Stage-4(CONDITIONAL) - File Merge - Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) - Stage-7 - Move Operator - Stage-6(CONDITIONAL) +Vertex dependency in root stage +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + +Stage-9 + Column Stats Work{} + Stage-3 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.orc_merge5"} + Stage-2 + Dependency Collection{} + Stage-5(CONDITIONAL) + Move Operator + Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) + Conditional Operator + Stage-1 + Reducer 2 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=2608) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)","compute_stats(VALUE._col3, 16)","compute_stats(VALUE._col4, 16)","compute_stats(VALUE._col5, 16)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_3] + table:{"name:":"default.orc_merge5"} + Select Operator [SEL_2] (rows=306/3 width=268) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_4] (rows=306/3 width=268) + predicate:(userid <= 13) + TableScan [TS_0] (rows=919/15000 width=268) + default@orc_merge5,orc_merge5,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=306/3 width=268) + Output:["userid","string1","subtype","decimal1","ts"] + Please refer to the previous Select Operator [SEL_2] + Stage-4(CONDITIONAL) File Merge Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) + Stage-7 + Move Operator + Stage-6(CONDITIONAL) + File Merge + Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) PREHOOK: query: drop table orc_merge5 PREHOOK: type: DROPTABLE diff --git a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out index 0a1e039cf1..a102b64e97 100644 --- a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out +++ b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out @@ -741,14 +741,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -757,43 +757,43 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 3 - Statistics: Num rows: 577 Data size: 18341 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1344 Data size: 10752 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan alias: p2 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -857,14 +857,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -873,44 +873,44 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 3 - Statistics: Num rows: 577 Data size: 18341 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1344 Data size: 10752 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan alias: p2 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -966,67 +966,72 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 577 Data size: 18341 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE + Map 4 Map Operator Tree: TableScan alias: p2 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 1344 Data size: 10752 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1082,68 +1087,72 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 577 Data size: 18341 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE + Map 4 Map Operator Tree: TableScan alias: p2 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 1344 Data size: 10752 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/tez/multi_count_distinct.q.out b/ql/src/test/results/clientpositive/tez/multi_count_distinct.q.out index 82c09faa5c..7f7d354b6c 100644 --- a/ql/src/test/results/clientpositive/tez/multi_count_distinct.q.out +++ b/ql/src/test/results/clientpositive/tez/multi_count_distinct.q.out @@ -41,19 +41,19 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_9] Group By Operator [GBY_8] (rows=1 width=24) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","count(_col1)","count(_col2)"] - Select Operator [SEL_6] (rows=13 width=5) + Select Operator [SEL_6] (rows=13 width=97) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_5] (rows=13 width=5) + Group By Operator [GBY_5] (rows=13 width=97) Output:["_col0","_col1","_col2","_col3"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_4] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_3] (rows=27 width=5) + Group By Operator [GBY_3] (rows=13 width=97) Output:["_col0","_col1","_col2","_col3"],keys:_col0, _col1, _col2, 0 - Select Operator [SEL_1] (rows=9 width=5) + Select Operator [SEL_1] (rows=9 width=93) Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=9 width=5) - default@employee,employee,Tbl:COMPLETE,Col:NONE,Output:["department_id","gender","education_level"] + TableScan [TS_0] (rows=9 width=93) + default@employee,employee,Tbl:COMPLETE,Col:COMPLETE,Output:["department_id","gender","education_level"] PREHOOK: query: select count(distinct department_id), count(distinct gender), count(distinct education_level) from employee PREHOOK: type: QUERY @@ -113,7 +113,7 @@ Stage-0 Stage-1 Reducer 3 File Output Operator [FS_12] - Select Operator [SEL_11] (rows=1 width=40) + Select Operator [SEL_11] (rows=1 width=56) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] Group By Operator [GBY_10] (rows=1 width=40) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","count(VALUE._col4)"] @@ -121,19 +121,19 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_9] Group By Operator [GBY_8] (rows=1 width=40) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["count(_col0)","count(_col1)","count(_col2)","count(_col3)","count(_col4)"] - Select Operator [SEL_6] (rows=22 width=5) + Select Operator [SEL_6] (rows=22 width=97) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_5] (rows=22 width=5) + Group By Operator [GBY_5] (rows=22 width=97) Output:["_col0","_col1","_col2","_col3"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_4] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_3] (rows=45 width=5) + Group By Operator [GBY_3] (rows=22 width=97) Output:["_col0","_col1","_col2","_col3"],keys:_col0, _col1, _col2, 0 - Select Operator [SEL_1] (rows=9 width=5) + Select Operator [SEL_1] (rows=9 width=93) Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=9 width=5) - default@employee,employee,Tbl:COMPLETE,Col:NONE,Output:["gender","department_id","education_level"] + TableScan [TS_0] (rows=9 width=93) + default@employee,employee,Tbl:COMPLETE,Col:COMPLETE,Output:["gender","department_id","education_level"] PREHOOK: query: select count(distinct gender), count(distinct department_id), count(distinct gender), count(distinct education_level), count(distinct education_level, department_id), count(distinct department_id, education_level), count(distinct department_id, education_level, gender) from employee diff --git a/ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out b/ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out index e09cbb91e5..161c14e5d2 100644 --- a/ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out @@ -51,7 +51,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: non_string_part - Statistics: Num rows: 3073 Data size: 351442 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3073 Data size: 24584 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4] @@ -61,7 +61,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean predicate: (cint > 0) (type: boolean) - Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1024 Data size: 8192 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), ctinyint (type: tinyint) outputColumnNames: _col0, _col1 @@ -69,7 +69,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 4] - Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1024 Data size: 8192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -106,19 +106,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1024 Data size: 8192 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -174,7 +174,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: non_string_part - Statistics: Num rows: 3073 Data size: 363734 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3073 Data size: 313446 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4] @@ -184,7 +184,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean predicate: (cint > 0) (type: boolean) - Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 104448 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cstring1 (type: string) outputColumnNames: _col0, _col1 @@ -192,7 +192,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 104448 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ @@ -228,19 +228,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 104448 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 1180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 1180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/truncate_column.q.out b/ql/src/test/results/clientpositive/truncate_column.q.out index cc3bc89cfe..7ff963cdc2 100644 --- a/ql/src/test/results/clientpositive/truncate_column.q.out +++ b/ql/src/test/results/clientpositive/truncate_column.q.out @@ -36,7 +36,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 10 rawDataSize 94 @@ -297,7 +297,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} #### A masked pattern was here #### numFiles 1 numRows 10 @@ -495,7 +495,7 @@ Database: default Table: test_tab_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 10 rawDataSize 94 diff --git a/ql/src/test/results/clientpositive/udf1.q.out b/ql/src/test/results/clientpositive/udf1.q.out index eebd90f891..91352af493 100644 --- a/ql/src/test/results/clientpositive/udf1.q.out +++ b/ql/src/test/results/clientpositive/udf1.q.out @@ -38,6 +38,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -64,6 +65,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16), compute_stats(c10, 16), compute_stats(c11, 16), compute_stats(c12, 16), compute_stats(c13, 16), compute_stats(c14, 16), compute_stats(c15, 16), compute_stats(c16, 16), compute_stats(c17, 16), compute_stats(c18, 16), compute_stats(c19, 16), compute_stats(c20, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 9840 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 9840 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: struct), _col16 (type: struct), _col17 (type: struct), _col18 (type: struct), _col19 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8), compute_stats(VALUE._col9), compute_stats(VALUE._col10), compute_stats(VALUE._col11), compute_stats(VALUE._col12), compute_stats(VALUE._col13), compute_stats(VALUE._col14), compute_stats(VALUE._col15), compute_stats(VALUE._col16), compute_stats(VALUE._col17), compute_stats(VALUE._col18), compute_stats(VALUE._col19) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 9840 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 9840 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -87,6 +114,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20 + Column Types: string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/udf3.q.out b/ql/src/test/results/clientpositive/udf3.q.out index 96038f12af..3b59e7f3f2 100644 --- a/ql/src/test/results/clientpositive/udf3.q.out +++ b/ql/src/test/results/clientpositive/udf3.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -55,6 +56,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: c1, c2, c3, c4, c5 + Statistics: Num rows: 1 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -69,6 +90,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5 + Column Types: string, string, string, string, string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(CAST('' AS INT)), sum(CAST('' AS INT)), avg(CAST('' AS INT)), min(CAST('' AS INT)), max(CAST('' AS INT)) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/udf_10_trims.q.out b/ql/src/test/results/clientpositive/udf_10_trims.q.out index 3a5303adfe..746a2aefaa 100644 --- a/ql/src/test/results/clientpositive/udf_10_trims.q.out +++ b/ql/src/test/results/clientpositive/udf_10_trims.q.out @@ -24,6 +24,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -50,6 +51,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -73,6 +100,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/udf_character_length.q.out b/ql/src/test/results/clientpositive/udf_character_length.q.out index 332ec95644..a85225f5f0 100644 --- a/ql/src/test/results/clientpositive/udf_character_length.q.out +++ b/ql/src/test/results/clientpositive/udf_character_length.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -71,6 +72,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -94,6 +121,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/udf_length.q.out b/ql/src/test/results/clientpositive/udf_length.q.out index fc795bbcf4..0da98e2eef 100644 --- a/ql/src/test/results/clientpositive/udf_length.q.out +++ b/ql/src/test/results/clientpositive/udf_length.q.out @@ -31,6 +31,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -54,6 +55,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -77,6 +104,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/udf_octet_length.q.out b/ql/src/test/results/clientpositive/udf_octet_length.q.out index f8738f813c..af648fb698 100644 --- a/ql/src/test/results/clientpositive/udf_octet_length.q.out +++ b/ql/src/test/results/clientpositive/udf_octet_length.q.out @@ -31,6 +31,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -54,6 +55,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -77,6 +104,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/udf_reverse.q.out b/ql/src/test/results/clientpositive/udf_reverse.q.out index 28b0c9f197..9257b2c8f0 100644 --- a/ql/src/test/results/clientpositive/udf_reverse.q.out +++ b/ql/src/test/results/clientpositive/udf_reverse.q.out @@ -31,6 +31,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -54,6 +55,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -77,6 +104,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union10.q.out b/ql/src/test/results/clientpositive/union10.q.out index 85eabf5e9c..5337e53b4c 100644 --- a/ql/src/test/results/clientpositive/union10.q.out +++ b/ql/src/test/results/clientpositive/union10.q.out @@ -29,6 +29,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -88,6 +89,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -103,6 +117,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -118,6 +145,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -141,6 +194,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union12.q.out b/ql/src/test/results/clientpositive/union12.q.out index 1b02d16739..30654a9c25 100644 --- a/ql/src/test/results/clientpositive/union12.q.out +++ b/ql/src/test/results/clientpositive/union12.q.out @@ -29,6 +29,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -88,6 +89,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -103,6 +117,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -118,6 +145,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -141,6 +194,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union17.q.out b/ql/src/test/results/clientpositive/union17.q.out index 18e06e1eeb..7398d9bf88 100644 --- a/ql/src/test/results/clientpositive/union17.q.out +++ b/ql/src/test/results/clientpositive/union17.q.out @@ -33,9 +33,13 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-7, Stage-8 + Stage-11 depends on stages: Stage-4, Stage-5, Stage-7, Stage-8 Stage-5 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-2 @@ -152,6 +156,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -166,11 +185,47 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -194,6 +249,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -205,9 +275,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-6 + Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc diff --git a/ql/src/test/results/clientpositive/union18.q.out b/ql/src/test/results/clientpositive/union18.q.out index caa664c2f9..2a78eaa0d3 100644 --- a/ql/src/test/results/clientpositive/union18.q.out +++ b/ql/src/test/results/clientpositive/union18.q.out @@ -35,16 +35,14 @@ STAGE DEPENDENCIES: Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 Stage-4 depends on stages: Stage-0 + Stage-13 depends on stages: Stage-4, Stage-10, Stage-11 + Stage-14 depends on stages: Stage-4, Stage-10, Stage-11 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-3 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-1 depends on stages: Stage-12, Stage-11, Stage-14 + Stage-1 depends on stages: Stage-3 Stage-10 depends on stages: Stage-1 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-11 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -96,6 +94,19 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL @@ -107,6 +118,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: s2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -125,6 +151,19 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL @@ -136,6 +175,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -159,6 +226,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: @@ -189,15 +270,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -215,31 +287,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/union19.q.out b/ql/src/test/results/clientpositive/union19.q.out index 5ce5905863..a002275e14 100644 --- a/ql/src/test/results/clientpositive/union19.q.out +++ b/ql/src/test/results/clientpositive/union19.q.out @@ -33,8 +33,12 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -101,6 +105,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: s2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -134,6 +153,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -153,6 +187,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -167,6 +216,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -177,9 +262,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc diff --git a/ql/src/test/results/clientpositive/union22.q.out b/ql/src/test/results/clientpositive/union22.q.out index 9134bdf19f..0de010295f 100644 --- a/ql/src/test/results/clientpositive/union22.q.out +++ b/ql/src/test/results/clientpositive/union22.q.out @@ -77,6 +77,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-4, Stage-6 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 STAGE PLANS: @@ -97,7 +98,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -225,7 +226,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k1":"true","k2":"true","k3":"true","k4":"true"}} bucket_count -1 column.name.delimiter , columns k1,k2,k3,k4 @@ -272,7 +273,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -361,6 +362,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), '2' (type: string) + outputColumnNames: k1, k2, k3, k4, ds + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16), compute_stats(k3, 16), compute_stats(k4, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false TableScan GatherStats: false Union @@ -395,6 +415,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), '2' (type: string) + outputColumnNames: k1, k2, k3, k4, ds + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16), compute_stats(k3, 16), compute_stats(k4, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -428,7 +467,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -470,6 +509,40 @@ STAGE PLANS: Truncated Path -> Alias: /dst_union22_delta/ds=1 [null-subquery1:$hdt$_0-subquery1:dst_union22_delta] #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -502,6 +575,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: k1, k2, k3, k4 + Column Types: string, string, string, string + Table: default.dst_union22 + Is Table Level Stats: false + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -558,7 +639,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k1":"true","k2":"true","k3":"true","k4":"true"}} bucket_count -1 column.name.delimiter , columns k1,k2,k3,k4 @@ -605,7 +686,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 diff --git a/ql/src/test/results/clientpositive/union28.q.out b/ql/src/test/results/clientpositive/union28.q.out index c3789d08e9..0d9ff2fc27 100644 --- a/ql/src/test/results/clientpositive/union28.q.out +++ b/ql/src/test/results/clientpositive/union28.q.out @@ -39,6 +39,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -102,6 +103,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE @@ -117,6 +131,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE @@ -132,6 +159,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -155,6 +208,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union29.q.out b/ql/src/test/results/clientpositive/union29.q.out index 87ba275b87..db92f3cdfa 100644 --- a/ql/src/test/results/clientpositive/union29.q.out +++ b/ql/src/test/results/clientpositive/union29.q.out @@ -38,6 +38,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -67,6 +68,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -88,6 +102,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -109,6 +136,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -132,6 +185,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union30.q.out b/ql/src/test/results/clientpositive/union30.q.out index 26a27c8e15..d033a51e05 100644 --- a/ql/src/test/results/clientpositive/union30.q.out +++ b/ql/src/test/results/clientpositive/union30.q.out @@ -53,6 +53,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-12 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -116,6 +117,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE @@ -131,6 +145,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE @@ -146,6 +173,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -167,6 +207,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -190,6 +256,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union31.q.out b/ql/src/test/results/clientpositive/union31.q.out index 4c26c7b6e9..b04b1eae56 100644 --- a/ql/src/test/results/clientpositive/union31.q.out +++ b/ql/src/test/results/clientpositive/union31.q.out @@ -72,9 +72,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-10 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -181,6 +185,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -195,11 +214,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.t3 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: value, cnt + Column Types: string, int + Table: default.t4 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) @@ -224,6 +279,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: value, cnt + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(value, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -235,9 +305,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t4 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from (select * from t1 union all @@ -340,12 +432,16 @@ insert overwrite table t6 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2, Stage-6 + Stage-3 depends on stages: Stage-2, Stage-8 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 - Stage-6 is a root stage + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 + Stage-8 is a root stage STAGE PLANS: Stage: Stage-2 @@ -426,6 +522,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -444,6 +555,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -458,6 +584,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t5 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t6 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -468,10 +630,32 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t6 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator - Stage: Stage-6 + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -654,8 +838,12 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -752,6 +940,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t7 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(1) keys: KEY._col0 (type: string) @@ -770,6 +973,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t8 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -784,6 +1002,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t7 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t8 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -794,9 +1048,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t8 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from ( select key as c1, count(1) as cnt from t1 group by key diff --git a/ql/src/test/results/clientpositive/union33.q.out b/ql/src/test/results/clientpositive/union33.q.out index 17aeecd254..e9ad715fbf 100644 --- a/ql/src/test/results/clientpositive/union33.q.out +++ b/ql/src/test/results/clientpositive/union33.q.out @@ -32,6 +32,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -124,6 +125,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -135,6 +149,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -158,6 +198,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_src + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -247,6 +294,7 @@ STAGE DEPENDENCIES: Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 Stage-4 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-4 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 @@ -330,6 +378,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -350,6 +411,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -373,6 +460,13 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_src + Stage: Stage-5 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union4.q.out b/ql/src/test/results/clientpositive/union4.q.out index 0821589138..8524d72afd 100644 --- a/ql/src/test/results/clientpositive/union4.q.out +++ b/ql/src/test/results/clientpositive/union4.q.out @@ -25,6 +25,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -83,6 +84,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE @@ -98,6 +112,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -121,6 +161,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union6.q.out b/ql/src/test/results/clientpositive/union6.q.out index 8448d8642f..53ffe5a05a 100644 --- a/ql/src/test/results/clientpositive/union6.q.out +++ b/ql/src/test/results/clientpositive/union6.q.out @@ -25,6 +25,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -78,6 +79,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: s2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -95,6 +109,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -118,6 +158,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union_lateralview.q.out b/ql/src/test/results/clientpositive/union_lateralview.q.out index f563476de0..b3762391a9 100644 --- a/ql/src/test/results/clientpositive/union_lateralview.q.out +++ b/ql/src/test/results/clientpositive/union_lateralview.q.out @@ -48,6 +48,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -178,6 +180,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_union_lateral_view + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, arr_ele, value + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(arr_ele, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -192,6 +209,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, arr_ele, value + Column Types: int, int, string + Table: default.test_union_lateral_view + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_union_lateral_view SELECT b.key, d.arr_ele, d.value FROM ( diff --git a/ql/src/test/results/clientpositive/union_stats.q.out b/ql/src/test/results/clientpositive/union_stats.q.out index 9ea0b519c2..08ac9ee868 100644 --- a/ql/src/test/results/clientpositive/union_stats.q.out +++ b/ql/src/test/results/clientpositive/union_stats.q.out @@ -487,7 +487,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1000 rawDataSize 10624 @@ -522,7 +522,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1000 rawDataSize 10624 diff --git a/ql/src/test/results/clientpositive/vector_bucket.q.out b/ql/src/test/results/clientpositive/vector_bucket.q.out index 3b74023c2b..d155705990 100644 --- a/ql/src/test/results/clientpositive/vector_bucket.q.out +++ b/ql/src/test/results/clientpositive/vector_bucket.q.out @@ -20,6 +20,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -75,6 +77,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.non_orc_table + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -89,6 +110,49 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: a, b + Column Types: int, string + Table: default.non_orc_table + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + notVectorizedReason: Value expression for REDUCESINK operator: Data type struct of Column[_col0] not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: select a, b from non_orc_table order by a PREHOOK: type: QUERY PREHOOK: Input: default@non_orc_table diff --git a/ql/src/test/results/clientpositive/vector_char_4.q.out b/ql/src/test/results/clientpositive/vector_char_4.q.out index 1c58fd209b..34646379fd 100644 --- a/ql/src/test/results/clientpositive/vector_char_4.q.out +++ b/ql/src/test/results/clientpositive/vector_char_4.q.out @@ -137,6 +137,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -148,38 +149,58 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] - selectExpressions: CastLongToChar(col 0, maxLength 10) -> 13:Char, CastLongToChar(col 1, maxLength 10) -> 14:Char, CastLongToChar(col 2, maxLength 20) -> 15:Char, CastLongToChar(col 3, maxLength 30) -> 16:Char, VectorUDFAdaptor(CAST( f AS CHAR(20)) -> 17:char(20), VectorUDFAdaptor(CAST( d AS CHAR(20)) -> 18:char(20), CastStringGroupToChar(col 8, maxLength 50) -> 19:Char Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.char_lazy_binary_columnar - Execution mode: vectorized + Select Operator + expressions: _col0 (type: char(10)), _col1 (type: char(10)), _col2 (type: char(20)), _col3 (type: char(30)), _col4 (type: char(20)), _col5 (type: char(20)), _col6 (type: char(50)) + outputColumnNames: ct, csi, ci, cb, cf, cd, cs + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ct, 16), compute_stats(csi, 16), compute_stats(ci, 16), compute_stats(cb, 16), compute_stats(cf, 16), compute_stats(cd, 16), compute_stats(cs, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -203,6 +224,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: ct, csi, ci, cb, cf, cd, cs + Column Types: char(10), char(10), char(20), char(30), char(20), char(20), char(50) + Table: default.char_lazy_binary_columnar + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/vector_char_simple.q.out b/ql/src/test/results/clientpositive/vector_char_simple.q.out index 72ea17b724..80c3edfba0 100644 --- a/ql/src/test/results/clientpositive/vector_char_simple.q.out +++ b/ql/src/test/results/clientpositive/vector_char_simple.q.out @@ -212,6 +212,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -244,11 +245,32 @@ STAGE PLANS: enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: + Group By Vectorization: + vectorOutput: false + native: false Stage: Stage-0 Stage: Stage-2 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + notVectorizedReason: Value expression for REDUCESINK operator: Data type struct of Column[_col0] not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Vectorization: + vectorOutput: false + native: false + PREHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc diff --git a/ql/src/test/results/clientpositive/vector_groupby4.q.out b/ql/src/test/results/clientpositive/vector_groupby4.q.out index 9de8e6eea7..6b54b0c76e 100644 --- a/ql/src/test/results/clientpositive/vector_groupby4.q.out +++ b/ql/src/test/results/clientpositive/vector_groupby4.q.out @@ -35,6 +35,9 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-3, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -143,6 +146,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -157,6 +170,96 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0] + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + notVectorizedReason: Value expression for REDUCESINK operator: Data type struct of Column[_col0] not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/vector_groupby6.q.out b/ql/src/test/results/clientpositive/vector_groupby6.q.out index 25cf5b2a4f..6cd698a300 100644 --- a/ql/src/test/results/clientpositive/vector_groupby6.q.out +++ b/ql/src/test/results/clientpositive/vector_groupby6.q.out @@ -35,6 +35,9 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-3, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -143,6 +146,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -157,6 +170,96 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0] + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + notVectorizedReason: Value expression for REDUCESINK operator: Data type struct of Column[_col0] not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/vector_multi_insert.q.out b/ql/src/test/results/clientpositive/vector_multi_insert.q.out index 4013cd4694..c44a10f627 100644 --- a/ql/src/test/results/clientpositive/vector_multi_insert.q.out +++ b/ql/src/test/results/clientpositive/vector_multi_insert.q.out @@ -85,23 +85,18 @@ STAGE DEPENDENCIES: Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 Stage-4 depends on stages: Stage-0 + Stage-14 depends on stages: Stage-4, Stage-10, Stage-11, Stage-12, Stage-13 + Stage-15 depends on stages: Stage-4, Stage-10, Stage-11, Stage-12, Stage-13 + Stage-16 depends on stages: Stage-4, Stage-10, Stage-11, Stage-12, Stage-13 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-3 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-1 depends on stages: Stage-12, Stage-11, Stage-14 + Stage-1 depends on stages: Stage-3 Stage-10 depends on stages: Stage-1 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 - Stage-21 depends on stages: Stage-3 , consists of Stage-18, Stage-17, Stage-19 - Stage-18 - Stage-2 depends on stages: Stage-18, Stage-17, Stage-20 - Stage-16 depends on stages: Stage-2 - Stage-17 - Stage-19 - Stage-20 depends on stages: Stage-19 + Stage-11 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-3 + Stage-12 depends on stages: Stage-2 + Stage-13 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -125,6 +120,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: rn + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(rn, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) Filter Operator predicate: ((rn >= 100) and (rn < 1000)) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE @@ -140,6 +148,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn2 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: rn + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(rn, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (rn >= 1000) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE @@ -155,15 +178,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn3 - Execution mode: vectorized + Select Operator + expressions: _col0 (type: int) + outputColumnNames: rn + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(rn, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -187,6 +239,27 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: rn + Column Types: int + Table: default.orc_rn1 + + Stage: Stage-15 + Column Stats Work + Column Stats Desc: + Columns: rn + Column Types: int + Table: default.orc_rn2 + + Stage: Stage-16 + Column Stats Work + Column Stats Desc: + Columns: rn + Column Types: int + Table: default.orc_rn3 + Stage: Stage-5 Map Reduce Map Operator Tree: @@ -217,15 +290,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -243,40 +307,33 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-21 - Conditional Operator - - Stage: Stage-18 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + notVectorizedReason: Value expression for REDUCESINK operator: Data type struct of Column[_col0] not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Move Operator @@ -288,38 +345,40 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn3 - Stage: Stage-16 + Stage: Stage-12 Stats-Aggr Operator - Stage: Stage-17 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn3 - - Stage: Stage-19 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn3 - - Stage: Stage-20 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + notVectorizedReason: Value expression for REDUCESINK operator: Data type struct of Column[_col0] not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from orc1 a insert overwrite table orc_rn1 select a.* where a.rn < 100 diff --git a/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out b/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out index c96ea00a86..4d9ab81ca2 100644 --- a/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out +++ b/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out @@ -96,6 +96,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -108,38 +109,58 @@ STAGE PLANS: alias: alltypesorc Row Limit Per Split: 1 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: 17.29 (type: decimal(18,9)) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [12] - selectExpressions: ConstantVectorExpression(val 17.29) -> 12:decimal(18,9) Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.decimal_2 - Execution mode: vectorized + Select Operator + expressions: 17.29 (type: decimal(18,9)) + outputColumnNames: t + Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(t, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 684 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 684 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -163,6 +184,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: t + Column Types: decimal(18,9) + Table: default.decimal_2 + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/vector_udf_character_length.q.out b/ql/src/test/results/clientpositive/vector_udf_character_length.q.out index 81d801c930..601ed7a793 100644 --- a/ql/src/test/results/clientpositive/vector_udf_character_length.q.out +++ b/ql/src/test/results/clientpositive/vector_udf_character_length.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -71,7 +72,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Execution mode: vectorized + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -95,6 +121,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/vector_udf_octet_length.q.out b/ql/src/test/results/clientpositive/vector_udf_octet_length.q.out index c71cfef83f..08844a7798 100644 --- a/ql/src/test/results/clientpositive/vector_udf_octet_length.q.out +++ b/ql/src/test/results/clientpositive/vector_udf_octet_length.q.out @@ -31,6 +31,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -54,7 +55,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Execution mode: vectorized + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -78,6 +104,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/vector_varchar_4.q.out b/ql/src/test/results/clientpositive/vector_varchar_4.q.out index 205c67a6ae..45e1322297 100644 --- a/ql/src/test/results/clientpositive/vector_varchar_4.q.out +++ b/ql/src/test/results/clientpositive/vector_varchar_4.q.out @@ -137,6 +137,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -148,38 +149,58 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] - selectExpressions: CastLongToVarChar(col 0, maxLength 10) -> 13:VarChar, CastLongToVarChar(col 1, maxLength 10) -> 14:VarChar, CastLongToVarChar(col 2, maxLength 20) -> 15:VarChar, CastLongToVarChar(col 3, maxLength 30) -> 16:VarChar, VectorUDFAdaptor(CAST( f AS varchar(20))) -> 17:varchar(20), VectorUDFAdaptor(CAST( d AS varchar(20))) -> 18:varchar(20), CastStringGroupToVarChar(col 8, maxLength 50) -> 19:VarChar Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.varchar_lazy_binary_columnar - Execution mode: vectorized + Select Operator + expressions: _col0 (type: varchar(10)), _col1 (type: varchar(10)), _col2 (type: varchar(20)), _col3 (type: varchar(30)), _col4 (type: varchar(20)), _col5 (type: varchar(20)), _col6 (type: varchar(50)) + outputColumnNames: vt, vsi, vi, vb, vf, vd, vs + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(vt, 16), compute_stats(vsi, 16), compute_stats(vi, 16), compute_stats(vb, 16), compute_stats(vf, 16), compute_stats(vd, 16), compute_stats(vs, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -203,6 +224,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: vt, vsi, vi, vb, vf, vd, vs + Column Types: varchar(10), varchar(10), varchar(20), varchar(30), varchar(20), varchar(20), varchar(50) + Table: default.varchar_lazy_binary_columnar + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/vector_varchar_simple.q.out b/ql/src/test/results/clientpositive/vector_varchar_simple.q.out index 0f8bdb58c3..6fe98fe319 100644 --- a/ql/src/test/results/clientpositive/vector_varchar_simple.q.out +++ b/ql/src/test/results/clientpositive/vector_varchar_simple.q.out @@ -276,6 +276,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -309,7 +310,6 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) Execution mode: vectorized Map Vectorization: @@ -344,6 +344,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.varchar_3 + Select Operator + expressions: _col0 (type: varchar(25)) + outputColumnNames: field + Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(field, 16) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -356,7 +375,48 @@ STAGE PLANS: name: default.varchar_3 Stage: Stage-2 - Stats-Aggr Operator + Column Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: field + Column Types: varchar(25) + Table: default.varchar_3 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + notVectorizedReason: Value expression for REDUCESINK operator: Data type struct of Column[_col0] not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: insert into table varchar_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/vectorized_context.q.out b/ql/src/test/results/clientpositive/vectorized_context.q.out index dac9b6b143..09efe0933a 100644 --- a/ql/src/test/results/clientpositive/vectorized_context.q.out +++ b/ql/src/test/results/clientpositive/vectorized_context.q.out @@ -109,29 +109,14 @@ STAGE PLANS: Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:store_sales + $hdt$_1:household_demographics Fetch Operator limit: -1 - $hdt$_2:household_demographics + $hdt$_2:store Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:store_sales - TableScan - alias: store_sales - Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_store_sk is not null and ss_hdemo_sk is not null) (type: boolean) - Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_store_sk (type: int), ss_hdemo_sk (type: int), ss_net_profit (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - $hdt$_2:household_demographics + $hdt$_1:household_demographics TableScan alias: household_demographics Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: NONE @@ -146,10 +131,7 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - - Stage: Stage-5 - Map Reduce - Map Operator Tree: + $hdt$_2:store TableScan alias: store Statistics: Num rows: 6075 Data size: 615632 Basic stats: COMPLETE Column stats: NONE @@ -160,24 +142,42 @@ STAGE PLANS: expressions: s_store_sk (type: int), s_city (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 6075 Data size: 615632 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_store_sk is not null and ss_hdemo_sk is not null) (type: boolean) + Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_store_sk (type: int), ss_hdemo_sk (type: int), ss_net_profit (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4 + outputColumnNames: _col0, _col2 Statistics: Num rows: 6682 Data size: 80009 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + outputColumnNames: _col2, _col5 Statistics: Num rows: 7350 Data size: 88009 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: string), _col2 (type: double) + expressions: _col5 (type: string), _col2 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 7350 Data size: 88009 Basic stats: COMPLETE Column stats: NONE Limit